Pattern #1 (456 models)
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+..."), behavior=Isolated, invert=False), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=False)])
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+..."), behavior=Isolated, invert=False), ByteLevel(add_prefix_space=False, trim_offsets=False, use_regex=False)])
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+decoder: ByteLevel(add_prefix_space=False, trim_offsets=False, use_regex=False)
Pattern #2 (285 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁"), Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A...")])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+normalizer: Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A...")
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
Pattern #3 (263 models)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+decoder: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
Pattern #4 (178 models)
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]..."), behavior=Removed, invert=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=False)])
Pattern #5 (157 models)
-AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
Pattern #6 (148 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("\n"), content=" "), Replace(pattern=Regex(" {2,}"), content=" ")])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
Pattern #7 (119 models)
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+pre_tokenizer: Sequence(pretokenizers=[Digits(individual_digits=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)])
Pattern #8 (104 models)
-normalizer: None
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=first, split=False)
+normalizer: Sequence(normalizers=[Prepend(prepend="▁"), Replace(pattern=String(" "), content="▁")])
+pre_tokenizer: None
Pattern #9 (98 models)
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+"), behavior=Removed, invert=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)])
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+"), behavior=Removed, invert=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)])
Pattern #10 (82 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #11 (57 models)
-normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=False)
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)
Pattern #12 (53 models)
-normalizer: None
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=False)
+normalizer: Sequence(normalizers=[Prepend(prepend="▁"), Replace(pattern=String(" "), content="▁")])
+pre_tokenizer: None
Pattern #13 (40 models)
-post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 1), trim_offsets=True, add_prefix_space=True)
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=1)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"])})
Pattern #14 (31 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=False), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #15 (29 models)
-normalizer: Sequence(normalizers=[NFD(), Lowercase(), StripAccents()])
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)
Pattern #16 (23 models)
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
Pattern #17 (17 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=String(" {2,}"), content="▁")])
-pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
-post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
+pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
Pattern #18 (15 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("\s{2,}|[\n\r\t]"), content=" "), NFC(), Strip(strip_left=False, strip_right=True)])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #19 (14 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Replace(pattern=Regex(" {2,}"), content=" ")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
-post_processor: TemplateProcessing(single=[SpecialToken(id="eng_Latn", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="eng_Latn", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "eng_Latn":SpecialToken(id="eng_Latn", ids=[256047], tokens=["eng_Latn"])})
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="<unk>", type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="<unk>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<unk>":SpecialToken(id="<unk>", ids=[3], tokens=["<unk>"])})
Pattern #20 (12 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=String(" {2,}"), content="▁")])
-pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
-post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
+pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
Pattern #21 (12 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("\n"), content=" "), Replace(pattern=Regex(" {2,}"), content=" ")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #22 (10 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=String(" {2,}"), content="▁")])
+normalizer: Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A...")
-post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
Pattern #23 (8 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("\n"), content=" "), Replace(pattern=Regex(" {2,}"), content=" ")])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
Pattern #24 (8 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #25 (8 models)
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=String("SPL1T-TH1S-Pl3A5E"), behavior=Removed, invert=False), Digits(individual_digits=True), Split(pattern=String("[\(\)\[\]\{\}]|([!\"#\$%\&'\*\+,\-\./:;<=>\?\\\^_`\|\~])\1*"), behavior=Isolated, invert=False), Split(pattern=String("
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=String("SPL1T-TH1S-Pl3A5E"), behavior=Removed, invert=False), Digits(individual_digits=True), Split(pattern=Regex("[\(\)\[\]\{\}]|([!"\#\$%\&'\*\+,\-\./:;<=>\?\\\^_`\|\~])\1*"), behavior=Isolated, invert=False), Split(pattern=String("
Pattern #26 (8 models)
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=String("SPL1T-TH1S-Pl3A5E"), behavior=Removed, invert=False), Digits(individual_digits=True), Split(pattern=String("[\(\)\[\]\{\}]|([!\"#\$%\&'\*\+,\-\./:;<=>\?\\\^_`\|\~])\1*"), behavior=Isolated, invert=False), Split(pattern=String("
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=String("SPL1T-TH1S-Pl3A5E"), behavior=Removed, invert=False), Digits(individual_digits=True), Split(pattern=Regex("[\(\)\[\]\{\}]|([!"\#\$%\&'\*\+,\-\./:;<=>\?\\\^_`\|\~])\1*"), behavior=Isolated, invert=False), Split(pattern=String("
-truncation: {'max_length': 4096, 'stride': 0, 'strategy': 'longest_first', 'direction': 'right'}
+truncation: {'max_length': 3584, 'stride': 0, 'strategy': 'longest_first', 'direction': 'right'}
Pattern #27 (7 models)
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=first, split=False)
+pre_tokenizer: None
-decoder: Sequence(decoders=[Replace(pattern=String("▁"), content=" "), ByteFallback(), Fuse(), Strip(content=" ", start=1, stop=0)])
+decoder: None
Pattern #28 (7 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #29 (7 models)
-AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
-pre_tokenizer: Split(pattern=String(" "), behavior=MergedWithPrevious, invert=False)
+pre_tokenizer: None
Pattern #30 (6 models)
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=first, split=False)
+pre_tokenizer: Sequence(pretokenizers=[ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True), Metaspace(replacement="▁", prepend_scheme=first, split=False)])
Pattern #31 (6 models)
-AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
+AddedToken("▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
-pre_tokenizer: Split(pattern=String(" "), behavior=MergedWithPrevious, invert=False)
+pre_tokenizer: None
Pattern #32 (6 models)
-model: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+model: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=first, split=True)
-post_processor: TemplateProcessing(single=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__fra__":SpecialToken(id="__fra__", ids=[256026], tokens=["__fra__"])})
-decoder: Metaspace(replacement="▁", prepend_scheme=first, split=True)
+normalizer: None
+pre_tokenizer: None
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=1)], special_tokens={})
+decoder: None
Pattern #33 (5 models)
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="[SEP]", type_id=0)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
Pattern #34 (5 models)
-pre_tokenizer: Split(pattern=String(" "), behavior=MergedWithPrevious, invert=False)
+pre_tokenizer: None
Pattern #35 (5 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
Pattern #36 (5 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
Pattern #37 (4 models)
-pre_tokenizer: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=True)
+pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
Pattern #38 (3 models)
+AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<|endofprompt|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]..."), behavior=Removed, invert=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=False)])
Pattern #39 (3 models)
-post_processor: RobertaProcessing(sep=("<sep>", 50265), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
+post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
Pattern #40 (3 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Replace(pattern=Regex(" {2,}"), content=" ")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #41 (3 models)
-normalizer: None
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+normalizer: NFKC()
+pre_tokenizer: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
Pattern #42 (3 models)
-post_processor: TemplateProcessing(single=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__fra__":SpecialToken(id="__fra__", ids=[256026], tokens=["__fra__"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__eng__":SpecialToken(id="__eng__", ids=[256022], tokens=["__eng__"])})
Pattern #43 (3 models)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-post_processor: TemplateProcessing(single=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__fra__":SpecialToken(id="__fra__", ids=[256026], tokens=["__fra__"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__eng__":SpecialToken(id="__eng__", ids=[256022], tokens=["__eng__"])})
Pattern #44 (2 models)
-normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=False, lowercase=False)
-pre_tokenizer: BertPreTokenizer()
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[2], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[3], tokens=["[SEP]"])})
-decoder: WordPiece(prefix="##", cleanup=True)
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
+pre_tokenizer: Sequence(pretokenizers=[BertPreTokenizer(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
+post_processor: BertProcessing(sep=("[SEP]", 3), cls=("[CLS]", 2))
+decoder: Metaspace(replacement="▁", prepend_scheme=always, split=True)
Pattern #45 (2 models)
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=first, split=False)
+pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=never, split=False)
Pattern #46 (2 models)
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=False)
+pre_tokenizer: None
-decoder: Sequence(decoders=[Replace(pattern=String("▁"), content=" "), ByteFallback(), Fuse(), Strip(content=" ", start=1, stop=0)])
+decoder: None
Pattern #47 (2 models)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+normalizer: None
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
Pattern #48 (2 models)
-normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)
-pre_tokenizer: BertPreTokenizer()
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[0], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
-decoder: WordPiece(prefix="##", cleanup=True)
+normalizer: Sequence(normalizers=[NFKC(), Lowercase()])
+pre_tokenizer: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+post_processor: BertProcessing(sep=("[SEP]", 2), cls=("[CLS]", 0))
+decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
Pattern #49 (2 models)
-pre_tokenizer: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("</s>", 36745), cls=("<s>", 36744), trim_offsets=True, add_prefix_space=True)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+pre_tokenizer: Whitespace()
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=1)], special_tokens={})
+decoder: None
Pattern #50 (2 models)
-normalizer: None
+normalizer: Lowercase()
Pattern #51 (2 models)
-AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-pre_tokenizer: Split(pattern=String(" "), behavior=MergedWithPrevious, invert=False)
+pre_tokenizer: None
Pattern #52 (2 models)
-AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
+AddedToken("<mask>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False)
-normalizer: Replace(pattern=String(" "), content="▁")
-pre_tokenizer: Split(pattern=String(" "), behavior=MergedWithPrevious, invert=False)
+normalizer: None
+pre_tokenizer: None
-decoder: Sequence(decoders=[Replace(pattern=String("▁"), content=" "), ByteFallback(), Fuse()])
+decoder: None
Pattern #53 (1 model)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
Pattern #54 (1 model)
-AddedToken("<SEP>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-AddedToken("<CLS>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-normalizer: NFC()
-pre_tokenizer: Sequence(pretokenizers=[Digits(individual_digits=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)])
+normalizer: None
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("\d{1,3}(?=(?:\d{3})*\b)"), behavior=Isolated, invert=False), Split(pattern=Regex("[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll..."), behavior=Isolated, invert=False), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=False)])
Pattern #55 (1 model)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=False), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
Pattern #56 (1 model)
-normalizer: None
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=never, split=False)
+normalizer: Sequence(normalizers=[Replace(pattern=String(" "), content="▁")])
+pre_tokenizer: None
Pattern #57 (1 model)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=False), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #58 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("\s{2,}|[\n\r\t]"), content=" "), NFC(), Strip(strip_left=False, strip_right=True)])
+normalizer: Sequence(normalizers=[Strip(strip_left=True, strip_right=True), Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
Pattern #59 (1 model)
-post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
Pattern #60 (1 model)
-normalizer: Sequence(normalizers=[Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-pre_tokenizer: Metaspace(replacement="▁", prepend_scheme=always, split=True)
+normalizer: Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A...")
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
Pattern #61 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
Pattern #62 (1 model)
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[1], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
+post_processor: BertProcessing(sep=("[SEP]", 2), cls=("[CLS]", 1))
Pattern #63 (1 model)
-normalizer: None
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+normalizer: Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A...")
+pre_tokenizer: Sequence(pretokenizers=[WhitespaceSplit(), Metaspace(replacement="▁", prepend_scheme=always, split=True)])
+post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=0), ...], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+decoder: Metaspace(replacement="▁", prepend_scheme=always, split=True)
Pattern #64 (1 model)
-post_processor: RobertaProcessing(sep=("</s>", 25905), cls=("<s>", 25904), trim_offsets=True, add_prefix_space=False)
+post_processor: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
Pattern #65 (1 model)
-normalizer: NFC()
+normalizer: None
Pattern #66 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Replace(pattern=Regex(" {2,}"), content=" ")])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Replace(pattern=Regex(" {2,}"), content=" ")])
-post_processor: TemplateProcessing(single=[SpecialToken(id="eng_Latn", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="eng_Latn", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "eng_Latn":SpecialToken(id="eng_Latn", ids=[256047], tokens=["eng_Latn"])})
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="eng_Latn", type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="eng_Latn", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "eng_Latn":SpecialToken(id="eng_Latn", ids=[256047], tokens=["eng_Latn"])})
Pattern #67 (1 model)
-normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)
+normalizer: Sequence(normalizers=[NFKD(), StripAccents(), Lowercase()])
-post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[2], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[3], tokens=["[SEP]"])})
+post_processor: TemplateProcessing(single=[Sequence(id=A, type_id=0)], pair=[Sequence(id=A, type_id=0), Sequence(id=B, type_id=1)], special_tokens={})
Pattern #68 (1 model)
-normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=False, lowercase=False)
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=False, lowercase=True)
Pattern #69 (1 model)
-AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-normalizer: None
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("[SEP]", 102), cls=("[CLS]", 101), trim_offsets=True, add_prefix_space=False)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)
+pre_tokenizer: BertPreTokenizer()
+post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[101], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[102], tokens=["[SEP]"])})
+decoder: WordPiece(prefix="##", cleanup=True)
Pattern #70 (1 model)
-post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
+post_processor: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
Pattern #71 (1 model)
-AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True)
-normalizer: None
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("[SEP]", 3), cls=("[CLS]", 2), trim_offsets=True, add_prefix_space=False)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=False, lowercase=True)
+pre_tokenizer: BertPreTokenizer()
+post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=1), SpecialToken(id="[SEP]", type_id=1)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[2], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[3], tokens=["[SEP]"])})
+decoder: WordPiece(prefix="##", cleanup=True)
Pattern #72 (1 model)
-normalizer: None
-pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
-post_processor: RobertaProcessing(sep=("[SEP]", 2), cls=("[CLS]", 0), trim_offsets=True, add_prefix_space=False)
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+normalizer: BertNormalizer(clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=False)
+pre_tokenizer: BertPreTokenizer()
+post_processor: TemplateProcessing(single=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0)], pair=[SpecialToken(id="[CLS]", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="[SEP]", type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="[SEP]", type_id=0)], special_tokens={"[CLS]":SpecialToken(id="[CLS]", ids=[0], tokens=["[CLS]"]), "[SEP]":SpecialToken(id="[SEP]", ids=[2], tokens=["[SEP]"])})
+decoder: WordPiece(prefix="##", cleanup=True)
Pattern #73 (1 model)
-normalizer: Sequence(normalizers=[NFC(), Replace(pattern=Regex("\s+"), content=" "), Lowercase()])
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+"), behavior=Removed, invert=True), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)])
-post_processor: RobertaProcessing(sep=("<|endoftext|>", 1), cls=("<|startoftext|>", 0), trim_offsets=False, add_prefix_space=False)
+normalizer: None
+pre_tokenizer: ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True)
+post_processor: ByteLevel(add_prefix_space=True, trim_offsets=False, use_regex=True)
Pattern #74 (1 model)
-pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?[\p{L}\p{M}]+|\p{N}| ?[^\s\p{L}\p{M}\p{N}]+[\r\n]*|\s..."), behavior=Isolated, invert=False), ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=False)])
+pre_tokenizer: Sequence(pretokenizers=[Split(pattern=Regex("(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?[\p{L}\p{M}]+|\p{N}| ?[^\s\p{L}\p{M}\p{N}]+[\r\n]*|\s..."), behavior=Isolated, invert=False), ByteLevel(add_prefix_space=False, trim_offsets=False, use_regex=False)])
-decoder: ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True)
+decoder: ByteLevel(add_prefix_space=False, trim_offsets=False, use_regex=False)
Pattern #75 (1 model)
-post_processor: TemplateProcessing(single=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="<s>", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0), SpecialToken(id="</s>", type_id=0), Sequence(id=B, type_id=1), ...], special_tokens={"</s>":SpecialToken(id="</s>", ids=[2], tokens=["</s>"]), "<s>":SpecialToken(id="<s>", ids=[0], tokens=["<s>"])})
+post_processor: RobertaProcessing(sep=("</s>", 2), cls=("<s>", 0), trim_offsets=True, add_prefix_space=False)
Pattern #76 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-post_processor: TemplateProcessing(single=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__fra__":SpecialToken(id="__fra__", ids=[256057], tokens=["__fra__"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="__dan__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="__dan__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__dan__":SpecialToken(id="__dan__", ids=[256041], tokens=["__dan__"])})
Pattern #77 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAhyIAgMzkAgC4PQAAeyIAgMzsAgC4BQAAiyIAgMw8AADNvAAAmwkAgJ4JAIChCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-post_processor: TemplateProcessing(single=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="</s>", type_id=0), SpecialToken(id="__fra__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__fra__":SpecialToken(id="__fra__", ids=[256057], tokens=["__fra__"])})
+post_processor: TemplateProcessing(single=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), SpecialToken(id="</s>", type_id=0)], pair=[SpecialToken(id="__eng__", type_id=0), Sequence(id=A, type_id=0), Sequence(id=B, type_id=0), SpecialToken(id="</s>", type_id=0)], special_tokens={"</s>":SpecialToken(id="</s>", ids=[3], tokens=["</s>"]), "__eng__":SpecialToken(id="__eng__", ids=[256047], tokens=["__eng__"])})
Pattern #78 (1 model)
-normalizer: Sequence(normalizers=[Replace(pattern=Regex("[\n\r\t]"), content=" "), NFKC(), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" +▁"), content="▁"), Replace(pattern=Regex("^▁+$"), content=""), ...])
+normalizer: Sequence(normalizers=[Precompiled(precompiled_charsmap="ALQCAACEAAAAAACAAQAAgMz8AgC4BQAAjSIAgMzkAgC4PQAAgSIAgMzsAgC4BQAAkSIAgMw8AADNvAAAngkAgKEJAICkCQCAgx0A..."), Strip(strip_left=False, strip_right=True), Replace(pattern=Regex(" {2,}"), content="▁")])
-decoder: Metaspace(replacement="▁", prepend_scheme=first, split=True)
+decoder: Metaspace(replacement="▁", prepend_scheme=always, split=True)