Skip to content

Problems with reading XML from buffering reader #469

@aegoroff

Description

@aegoroff

Here is an example

fn main() {
    use quick_xml::events::Event;
    use quick_xml::reader::Reader;
    use std::io::BufReader;

    let xml = r###"<torrents>
    <torrent id="2" registred_at="2009.08.19 00:13:00" size="3">
    <title>Заголовок</title>
    <torrent hash="7096DB05CC2612B30079B26C94823DD8CA2A8156" tracker_id="3"/>
    <forum id="1"><![CDATA[<fo>]]></forum>
    <del/>
    <content><![CDATA[
        some content
    ]]></content>
    </torrent>
    </torrents>"###;

    let br = BufReader::with_capacity(32, xml.as_bytes());
    let mut reader = Reader::from_reader(br);
    reader.trim_text(true);

    let mut buf = Vec::new();
    let mut read_title = false;
    loop {
        match reader.read_event_into(&mut buf) {
            Ok(Event::Start(e)) if e.name().as_ref() == b"title" => {
                read_title = true;
            }
            Ok(Event::Text(e)) => {
                if read_title {
                    read_title = false;
                    let escaped = &e.into_inner();
                    let text = reader.decoder().decode(escaped).unwrap_or_default();
                    let title = String::from(text);
                    print!("{title}");
                }
            }
            Ok(Event::Eof) => break, // exits the loop when reaching end of file
            Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
            _ => (), // There are several other `Event`s we do not consider here
        }
        buf.clear();
    }
}

when you run it you will crash with something like this

    Finished dev [unoptimized + debuginfo] target(s) in 0.00s
     Running `target/debug/qxerr`
thread 'main' panicked at 'Error at position 299: EndEventMismatch { expected: "forum", found: "content" }', src/main.rs:39:23
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Заголовок%         

if buffer capacity increase to 512 (see let br = BufReader::with_capacity(32, xml.as_bytes());) the problem will gone. Replacing non Latin1 chars to latin1 ones will also solves the problem.

ADDITIONAL INFO

It was not a problem before v0.23, so v0.22 parsed such files correctly

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions