Skip to content

Commit 8975390

Browse files
committed
Simplify RXN V3000 reading and allow agents to be read.
1 parent faef4b1 commit 8975390

File tree

2 files changed

+180
-46
lines changed

2 files changed

+180
-46
lines changed

storage/ctab/src/main/java/org/openscience/cdk/io/MDLRXNV3000Reader.java

Lines changed: 51 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import java.io.InputStreamReader;
2525
import java.io.Reader;
2626
import java.io.StringReader;
27+
import java.util.Locale;
2728
import java.util.StringTokenizer;
2829

30+
import org.openscience.cdk.ReactionRole;
2931
import org.openscience.cdk.exception.CDKException;
3032
import org.openscience.cdk.interfaces.IAtomContainer;
3133
import org.openscience.cdk.interfaces.IChemModel;
@@ -177,6 +179,7 @@ private IReaction readReaction(IChemObjectBuilder builder) throws CDKException {
177179

178180
int reactantCount = 0;
179181
int productCount = 0;
182+
int agentCount = 0;
180183
boolean foundCOUNTS = false;
181184
while (isReady() && !foundCOUNTS) {
182185
String command = readCommand();
@@ -188,6 +191,12 @@ private IReaction readReaction(IChemObjectBuilder builder) throws CDKException {
188191
logger.info("Expecting " + reactantCount + " reactants in file");
189192
productCount = Integer.valueOf(tokenizer.nextToken()).intValue();
190193
logger.info("Expecting " + productCount + " products in file");
194+
if (tokenizer.hasMoreTokens()) {
195+
agentCount = Integer.valueOf(tokenizer.nextToken()).intValue();
196+
logger.info("Expecting " + agentCount + " products in file");
197+
if (mode == Mode.STRICT && agentCount > 0)
198+
throw new CDKException("RXN files uses agent count extension");
199+
}
191200
} catch (Exception exception) {
192201
logger.debug(exception);
193202
throw new CDKException("Error while counts line of RXN file", exception);
@@ -198,71 +207,67 @@ private IReaction readReaction(IChemObjectBuilder builder) throws CDKException {
198207
}
199208
}
200209

201-
// now read the reactants
202-
for (int i = 1; i <= reactantCount; i++) {
203-
StringBuffer molFile = new StringBuffer();
204-
String announceMDLFileLine = readCommand();
205-
if (!announceMDLFileLine.equals("BEGIN REACTANT")) {
206-
String error = "Excepted start of reactant, but found: " + announceMDLFileLine;
207-
logger.error(error);
208-
throw new CDKException(error);
209-
}
210-
String molFileLine = "";
211-
while (!molFileLine.endsWith("END REACTANT")) {
212-
molFileLine = readLine();
213-
molFile.append(molFileLine);
214-
molFile.append('\n');
215-
};
210+
readMols(builder, reaction, ReactionRole.Reactant, reactantCount);
211+
readMols(builder, reaction, ReactionRole.Product, productCount);
212+
readMols(builder, reaction, ReactionRole.Agent, agentCount);
216213

217-
try {
218-
// read MDL molfile content
219-
MDLV3000Reader reader = new MDLV3000Reader(new StringReader(molFile.toString()), super.mode);
220-
IAtomContainer reactant = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class));
221-
reader.close();
214+
return reaction;
215+
}
222216

223-
// add reactant
224-
reaction.addReactant(reactant);
225-
} catch (IllegalArgumentException | CDKException | IOException exception) {
226-
String error = "Error while reading reactant: " + exception.getMessage();
227-
logger.error(error);
228-
logger.debug(exception);
229-
throw new CDKException(error, exception);
230-
}
231-
}
217+
private void readMols(IChemObjectBuilder builder, IReaction reaction, ReactionRole role, int count) throws CDKException {
218+
if (count == 0)
219+
return;
220+
String command = readCommand();
221+
if (!command.equals("BEGIN " + role.name().toUpperCase(Locale.ROOT)))
222+
throw new CDKException("Expected start of " + role + "s but got: " + command);
223+
224+
StringBuilder molFile = new StringBuilder();
232225

233-
// now read the products
234-
for (int i = 1; i <= productCount; i++) {
235-
StringBuffer molFile = new StringBuffer();
236-
String announceMDLFileLine = readCommand();
237-
if (!announceMDLFileLine.equals("BEGIN PRODUCT")) {
238-
String error = "Excepted start of product, but found: " + announceMDLFileLine;
226+
// now read the reactants
227+
for (int i = 0; i < count; i++) {
228+
molFile.setLength(0);
229+
command = readCommand();
230+
if (!command.endsWith("BEGIN CTAB")) {
231+
String error = "Excepted start of " + role + " CTAB, but found: " + command;
239232
logger.error(error);
240233
throw new CDKException(error);
241234
}
242235
String molFileLine = "";
243-
while (!molFileLine.endsWith("END PRODUCT")) {
244-
molFileLine = readLine();
245-
molFile.append(molFileLine);
246-
molFile.append('\n');
247-
};
236+
while ((molFileLine = readLine()) != null) {
237+
molFile.append(molFileLine).append('\n');
238+
if (molFileLine.endsWith("END CTAB"))
239+
break;
240+
}
248241

249242
try {
250243
// read MDL molfile content
251-
MDLV3000Reader reader = new MDLV3000Reader(new StringReader(molFile.toString()));
252-
IAtomContainer product = (IAtomContainer) reader.read(builder.newInstance(IAtomContainer.class));
244+
MDLV3000Reader reader = new MDLV3000Reader(new StringReader(molFile.toString()), super.mode);
245+
IAtomContainer mol = reader.read(builder.newAtomContainer());
253246
reader.close();
254247

255-
// add product
256-
reaction.addProduct(product);
248+
switch (role) {
249+
case Reactant:
250+
reaction.addReactant(mol);
251+
break;
252+
case Agent:
253+
reaction.addAgent(mol);
254+
break;
255+
case Product:
256+
reaction.addProduct(mol);
257+
break;
258+
}
259+
257260
} catch (IllegalArgumentException | CDKException | IOException exception) {
258-
String error = "Error while reading product: " + exception.getMessage();
261+
String error = "Error while reading reactant: " + exception.getMessage();
259262
logger.error(error);
260263
logger.debug(exception);
261264
throw new CDKException(error, exception);
262265
}
263266
}
264267

265-
return reaction;
268+
command = readCommand();
269+
if (!command.equals("END " + role.name().toUpperCase(Locale.ROOT)))
270+
throw new CDKException("Expected end of " + role + "s but got: " + command);
266271
}
267272

268273
private boolean isReady() throws CDKException {

storage/ctab/src/test/java/org/openscience/cdk/io/MDLRXNV3000ReaderTest.java

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,21 @@
2323
*/
2424
package org.openscience.cdk.io;
2525

26+
import java.io.IOException;
2627
import java.io.InputStream;
28+
import java.io.StringReader;
2729

2830
import org.junit.Assert;
2931
import org.junit.BeforeClass;
3032
import org.junit.Test;
3133
import org.openscience.cdk.ChemModel;
3234
import org.openscience.cdk.Reaction;
35+
import org.openscience.cdk.exception.CDKException;
3336
import org.openscience.cdk.interfaces.IAtomContainer;
37+
import org.openscience.cdk.interfaces.IChemObjectBuilder;
3438
import org.openscience.cdk.interfaces.IReaction;
3539
import org.openscience.cdk.io.IChemObjectReader.Mode;
40+
import org.openscience.cdk.silent.SilentChemObjectBuilder;
3641
import org.openscience.cdk.tools.ILoggingTool;
3742
import org.openscience.cdk.tools.LoggingToolFactory;
3843

@@ -86,4 +91,128 @@ public void testReadReactions1() throws Exception {
8691

8792
}
8893

94+
@Test public void readAgents() throws IOException, CDKException {
95+
String rxnfile = "$RXN V3000\n" +
96+
"\n" +
97+
" Mrv1810 020601212219\n" +
98+
"\n" +
99+
"M V30 COUNTS 2 1 2\n" +
100+
"M V30 BEGIN REACTANT\n" +
101+
"M V30 BEGIN CTAB\n" +
102+
"M V30 COUNTS 9 9 0 0 0\n" +
103+
"M V30 BEGIN ATOM\n" +
104+
"M V30 1 C -24.3094 2.695 0 0\n" +
105+
"M V30 2 C -22.9758 1.925 0 0\n" +
106+
"M V30 3 C -21.6421 2.695 0 0\n" +
107+
"M V30 4 C -22.9758 0.385 0 0\n" +
108+
"M V30 5 C -21.6421 -0.385 0 0\n" +
109+
"M V30 6 C -21.6421 -1.925 0 0\n" +
110+
"M V30 7 C -22.9758 -2.695 0 0\n" +
111+
"M V30 8 C -24.3094 -1.925 0 0\n" +
112+
"M V30 9 C -24.3094 -0.385 0 0\n" +
113+
"M V30 END ATOM\n" +
114+
"M V30 BEGIN BOND\n" +
115+
"M V30 1 1 1 2\n" +
116+
"M V30 2 1 2 3\n" +
117+
"M V30 3 1 2 4\n" +
118+
"M V30 4 4 4 5\n" +
119+
"M V30 5 4 5 6\n" +
120+
"M V30 6 4 6 7\n" +
121+
"M V30 7 4 7 8\n" +
122+
"M V30 8 4 8 9\n" +
123+
"M V30 9 4 4 9\n" +
124+
"M V30 END BOND\n" +
125+
"M V30 END CTAB\n" +
126+
"M V30 BEGIN CTAB\n" +
127+
"M V30 COUNTS 7 6 0 0 0\n" +
128+
"M V30 BEGIN ATOM\n" +
129+
"M V30 1 C -14.341 0.1528 0 0\n" +
130+
"M V30 2 C -15.6747 -0.6172 0 0\n" +
131+
"M V30 3 C -17.0084 0.1528 0 0\n" +
132+
"M V30 4 O -18.3421 -0.6172 0 0\n" +
133+
"M V30 5 Cl -17.0084 1.6928 0 0\n" +
134+
"M V30 6 C -13.0074 -0.6172 0 0\n" +
135+
"M V30 7 Cl -11.6737 0.1528 0 0\n" +
136+
"M V30 END ATOM\n" +
137+
"M V30 BEGIN BOND\n" +
138+
"M V30 1 1 1 2\n" +
139+
"M V30 2 1 2 3\n" +
140+
"M V30 3 2 3 4\n" +
141+
"M V30 4 1 3 5\n" +
142+
"M V30 5 1 1 6\n" +
143+
"M V30 6 1 6 7\n" +
144+
"M V30 END BOND\n" +
145+
"M V30 END CTAB\n" +
146+
"M V30 END REACTANT\n" +
147+
"M V30 BEGIN PRODUCT\n" +
148+
"M V30 BEGIN CTAB\n" +
149+
"M V30 COUNTS 15 15 0 0 0\n" +
150+
"M V30 BEGIN ATOM\n" +
151+
"M V30 1 C 18.9747 -3.08 0 0\n" +
152+
"M V30 2 C 18.9747 -1.54 0 0\n" +
153+
"M V30 3 C 17.641 -0.77 0 0\n" +
154+
"M V30 4 C 20.3084 -0.77 0 0\n" +
155+
"M V30 5 C 20.3084 0.77 0 0\n" +
156+
"M V30 6 C 21.6421 1.54 0 0\n" +
157+
"M V30 7 C 22.9758 0.77 0 0\n" +
158+
"M V30 8 C 22.9758 -0.77 0 0\n" +
159+
"M V30 9 C 21.6421 -1.54 0 0\n" +
160+
"M V30 10 C 24.3094 1.54 0 0\n" +
161+
"M V30 11 O 24.3094 3.08 0 0\n" +
162+
"M V30 12 C 25.6431 0.77 0 0\n" +
163+
"M V30 13 C 26.9768 1.54 0 0\n" +
164+
"M V30 14 C 28.3105 0.77 0 0\n" +
165+
"M V30 15 Cl 29.6441 1.54 0 0\n" +
166+
"M V30 END ATOM\n" +
167+
"M V30 BEGIN BOND\n" +
168+
"M V30 1 1 1 2\n" +
169+
"M V30 2 1 2 3\n" +
170+
"M V30 3 1 2 4\n" +
171+
"M V30 4 4 4 5\n" +
172+
"M V30 5 4 5 6\n" +
173+
"M V30 6 4 6 7\n" +
174+
"M V30 7 4 7 8\n" +
175+
"M V30 8 4 8 9\n" +
176+
"M V30 9 4 4 9\n" +
177+
"M V30 10 1 7 10\n" +
178+
"M V30 11 2 10 11\n" +
179+
"M V30 12 1 10 12\n" +
180+
"M V30 13 1 12 13\n" +
181+
"M V30 14 1 13 14\n" +
182+
"M V30 15 1 14 15\n" +
183+
"M V30 END BOND\n" +
184+
"M V30 END CTAB\n" +
185+
"M V30 END PRODUCT\n" +
186+
"M V30 BEGIN AGENT\n" +
187+
"M V30 BEGIN CTAB\n" +
188+
"M V30 COUNTS 4 0 0 0 0\n" +
189+
"M V30 BEGIN ATOM\n" +
190+
"M V30 1 Al -3.135 3.3128 0 0 CHG=3\n" +
191+
"M V30 2 Cl -1.045 3.3128 0 0 CHG=-1\n" +
192+
"M V30 3 Cl 1.045 3.3128 0 0 CHG=-1\n" +
193+
"M V30 4 Cl 3.135 3.3128 0 0 CHG=-1\n" +
194+
"M V30 END ATOM\n" +
195+
"M V30 END CTAB\n" +
196+
"M V30 BEGIN CTAB\n" +
197+
"M V30 COUNTS 3 2 0 0 0\n" +
198+
"M V30 BEGIN ATOM\n" +
199+
"M V30 1 C 7.9887 2.7995 0 0\n" +
200+
"M V30 2 Cl 6.655 3.5695 0 0\n" +
201+
"M V30 3 Cl 9.3224 3.5695 0 0\n" +
202+
"M V30 END ATOM\n" +
203+
"M V30 BEGIN BOND\n" +
204+
"M V30 1 1 1 2\n" +
205+
"M V30 2 1 1 3\n" +
206+
"M V30 END BOND\n" +
207+
"M V30 END CTAB\n" +
208+
"M V30 END AGENT\n" +
209+
"M END\n";
210+
try (MDLRXNV3000Reader mdlr = new MDLRXNV3000Reader(new StringReader(rxnfile))) {
211+
IChemObjectBuilder bldr = SilentChemObjectBuilder.getInstance();
212+
IReaction reaction = mdlr.read(bldr.newInstance(IReaction.class));
213+
Assert.assertEquals(1, reaction.getReactantCount());
214+
Assert.assertEquals(1, reaction.getProductCount());
215+
Assert.assertEquals(2, reaction.getAgents().getAtomContainerCount());
216+
}
217+
}
89218
}

0 commit comments

Comments
 (0)