Skip to content

Commit b4df4a6

Browse files
committed
Simpler and more robust RXN file parsing. We store lines in a buffer and process every time we see a new $MOL line. All records are stored in an intermediate "components" buffer that are then divvied up at the end.
1 parent d6cad8b commit b4df4a6

File tree

1 file changed

+42
-80
lines changed

1 file changed

+42
-80
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLRXNV2000Reader.java

Lines changed: 42 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
import java.io.InputStreamReader;
4646
import java.io.Reader;
4747
import java.io.StringReader;
48+
import java.util.ArrayList;
4849
import java.util.Iterator;
50+
import java.util.List;
4951
import java.util.StringTokenizer;
5052

5153
/**
@@ -198,118 +200,72 @@ private IReaction readReaction(IChemObjectBuilder builder) throws CDKException {
198200
throw new CDKException("Error while reading header of RXN file", exception);
199201
}
200202

201-
int reactantCount = 0;
202-
int productCount = 0;
203+
int numReactans = 0;
204+
int numProducts = 0;
203205
int agentCount = 0;
204206
try {
205207
String countsLine = input.readLine();
206208
/*
207209
* this line contains the number of reactants and products
208210
*/
209211
StringTokenizer tokenizer = new StringTokenizer(countsLine);
210-
reactantCount = Integer.valueOf(tokenizer.nextToken());
211-
logger.info("Expecting " + reactantCount + " reactants in file");
212-
productCount = Integer.valueOf(tokenizer.nextToken());
212+
numReactans = Integer.valueOf(tokenizer.nextToken());
213+
logger.info("Expecting " + numReactans + " reactants in file");
214+
numProducts = Integer.valueOf(tokenizer.nextToken());
213215
if (tokenizer.hasMoreTokens()) {
214216
agentCount = Integer.valueOf(tokenizer.nextToken());
215217
// ChemAxon extension, technically BIOVIA now support this but
216218
// not documented yet
217219
if (mode == Mode.STRICT && agentCount > 0)
218220
throw new CDKException("RXN files uses agent count extension");
219221
}
220-
logger.info("Expecting " + productCount + " products in file");
222+
logger.info("Expecting " + numProducts + " products in file");
221223
} catch (IOException | NumberFormatException exception) {
222224
logger.debug(exception);
223225
throw new CDKException("Error while counts line of RXN file", exception);
224226
}
225227

226-
// now read the reactants
228+
// now read the molecules
227229
try {
228-
for (int i = 1; i <= reactantCount; i++) {
229-
StringBuffer molFile = new StringBuffer();
230-
String molFileLine = "";
231-
while (!input.readLine().startsWith("$MOL")) {
232-
// skip
233-
}
234-
do {
235-
molFileLine = input.readLine();
236-
molFile.append(molFileLine);
237-
molFile.append('\n');
238-
} while (!molFileLine.equals("M END"));
239-
240-
// read MDL molfile content
241-
// Changed this to mdlv2000 reader
242-
MDLV2000Reader reader = new MDLV2000Reader(new StringReader(molFile.toString()), super.mode);
243-
IAtomContainer reactant = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class));
244-
reader.close();
245-
246-
// add reactant
247-
reaction.addReactant(reactant);
230+
String line = input.readLine();
231+
if (line == null || !line.startsWith("$MOL")) {
232+
throw new CDKException("Expected $MOL to start, was" + line);
248233
}
249-
} catch (CDKException exception) {
250-
// rethrow exception from MDLReader
251-
throw exception;
252-
} catch (IOException | IllegalArgumentException exception) {
253-
logger.debug(exception);
254-
throw new CDKException("Error while reading reactant", exception);
255-
}
256234

257-
// now read the products
258-
try {
259-
for (int i = 1; i <= productCount; i++) {
260-
StringBuffer molFile = new StringBuffer();
261-
while (!input.readLine().startsWith("$MOL")) {
262-
// skip
263-
}
264-
String molFileLine = "";
265-
do {
266-
molFileLine = input.readLine();
267-
molFile.append(molFileLine);
268-
molFile.append('\n');
269-
} while (!molFileLine.equals("M END"));
270-
271-
// read MDL molfile content
272-
MDLV2000Reader reader = new MDLV2000Reader(new StringReader(molFile.toString()));
273-
IAtomContainer product = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class));
274-
reader.close();
235+
List<IAtomContainer> components = new ArrayList<>();
275236

276-
// add reactant
277-
reaction.addProduct(product);
237+
StringBuilder sb = new StringBuilder();
238+
while ((line = input.readLine()) != null) {
239+
if (line.startsWith("$MOL")) {
240+
processMol(builder.newAtomContainer(), components, sb);
241+
sb.setLength(0);
242+
} else {
243+
sb.append(line).append('\n');
244+
}
278245
}
279-
} catch (CDKException exception) {
280-
// rethrow exception from MDLReader
281-
throw exception;
282-
} catch (IOException | IllegalArgumentException exception) {
283-
logger.debug(exception);
284-
throw new CDKException("Error while reading products", exception);
285-
}
286-
287-
// now read the products
288-
try {
289-
for (int i = 1; i <= agentCount; i++) {
290-
StringBuffer molFile = new StringBuffer();
291-
input.readLine(); // String announceMDLFileLine =
292-
String molFileLine = "";
293-
do {
294-
molFileLine = input.readLine();
295-
molFile.append(molFileLine);
296-
molFile.append('\n');
297-
} while (!molFileLine.equals("M END"));
298246

299-
// read MDL molfile content
300-
MDLV2000Reader reader = new MDLV2000Reader(new StringReader(molFile.toString()));
301-
IAtomContainer product = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class));
302-
reader.close();
247+
// last record
248+
if (sb.length() > 0)
249+
processMol(builder.newAtomContainer(), components, sb);
303250

304-
// add reactant
305-
reaction.addAgent(product);
251+
for (IAtomContainer component : components.subList(0, numReactans)) {
252+
reaction.addReactant(component);
253+
}
254+
for (IAtomContainer component : components.subList(numReactans,
255+
numReactans+numProducts)) {
256+
reaction.addProduct(component);
306257
}
258+
for (IAtomContainer component : components.subList(numReactans+numProducts,
259+
components.size())) {
260+
reaction.addAgent(component);
261+
}
262+
307263
} catch (CDKException exception) {
308264
// rethrow exception from MDLReader
309265
throw exception;
310266
} catch (IOException | IllegalArgumentException exception) {
311267
logger.debug(exception);
312-
throw new CDKException("Error while reading products", exception);
268+
throw new CDKException("Error while reading reactant", exception);
313269
}
314270

315271
// now try to map things, if wanted
@@ -348,6 +304,12 @@ private IReaction readReaction(IChemObjectBuilder builder) throws CDKException {
348304
return reaction;
349305
}
350306

307+
private void processMol(IAtomContainer mol, List<IAtomContainer> components, StringBuilder sb) throws CDKException, IOException {
308+
MDLV2000Reader reader = new MDLV2000Reader(new StringReader(sb.toString()), super.mode);
309+
components.add(reader.read(mol));
310+
reader.close();
311+
}
312+
351313
@Override
352314
public void close() throws IOException {
353315
input.close();

0 commit comments

Comments
 (0)