Skip to content

Commit d106548

Browse files
committed
Don't not include an "R" in the formula for attachment points/leaving groups.
1 parent b02a79e commit d106548

File tree

2 files changed

+59
-23
lines changed

2 files changed

+59
-23
lines changed

base/test-standard/src/test/java/org/openscience/cdk/tools/manipulator/AtomContainerManipulatorTest.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,4 +1394,20 @@ static void assertRemoveH(String smiIn, String smiExp) throws Exception {
13941394
assertThat(AtomContainerManipulator.getMass(mol, MostAbundant),
13951395
closeTo(4731.154, 0.001));
13961396
}
1397+
1398+
// can't put these test in cdk-formula since we can't access SMILES and it's a bit verbose
1399+
// to construct the molecules as needed
1400+
@Test public void getFormulaMultiattach() throws InvalidSmilesException {
1401+
SmilesParser smipar = new SmilesParser(SilentChemObjectBuilder.getInstance());
1402+
IAtomContainer mol = smipar.parseSmiles("[Ru]([P](CCC1=CC=CC=C1)(C2CCCCC2)C3CCCCC3)(Cl)(Cl)*.C1(=CC=C(C=C1)C(C)C)C |m:24:25.26.27.28.29.30|");
1403+
String mf = MolecularFormulaManipulator.getString(MolecularFormulaManipulator.getMolecularFormula(mol));
1404+
assertThat(mf, CoreMatchers.is("C30H45Cl2PRu"));
1405+
}
1406+
1407+
@Test public void getFormulaAttach() throws InvalidSmilesException {
1408+
SmilesParser smipar = new SmilesParser(SilentChemObjectBuilder.getInstance());
1409+
IAtomContainer mol = smipar.parseSmiles("*c1cc(*)ccc1 |$_AP1;;;;R;$|");
1410+
String mf = MolecularFormulaManipulator.getString(MolecularFormulaManipulator.getMolecularFormula(mol));
1411+
assertThat(mf, CoreMatchers.is("C6H5R"));
1412+
}
13971413
}

tool/formula/src/main/java/org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator.java

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,21 @@
2323
* */
2424
package org.openscience.cdk.tools.manipulator;
2525

26+
import org.openscience.cdk.CDK;
2627
import org.openscience.cdk.CDKConstants;
2728
import org.openscience.cdk.config.AtomTypeFactory;
2829
import org.openscience.cdk.config.Elements;
2930
import org.openscience.cdk.config.IsotopeFactory;
3031
import org.openscience.cdk.config.Isotopes;
3132
import org.openscience.cdk.exception.CDKException;
32-
import org.openscience.cdk.interfaces.IAtom;
33-
import org.openscience.cdk.interfaces.IAtomContainer;
34-
import org.openscience.cdk.interfaces.IAtomType;
35-
import org.openscience.cdk.interfaces.IChemObjectBuilder;
36-
import org.openscience.cdk.interfaces.IElement;
37-
import org.openscience.cdk.interfaces.IIsotope;
38-
import org.openscience.cdk.interfaces.IMolecularFormula;
33+
import org.openscience.cdk.interfaces.*;
34+
import org.openscience.cdk.sgroup.Sgroup;
35+
import org.openscience.cdk.sgroup.SgroupType;
3936
import org.openscience.cdk.tools.LoggingToolFactory;
4037

4138
import java.io.IOException;
42-
import java.util.ArrayList;
43-
import java.util.Arrays;
44-
import java.util.Collections;
45-
import java.util.Comparator;
46-
import java.util.List;
47-
import java.util.Map;
48-
import java.util.Objects;
49-
import java.util.TreeMap;
39+
import java.util.*;
40+
5041
/**
5142
* Class with convenience methods that provide methods to manipulate
5243
* {@link IMolecularFormula}'s. For example:
@@ -245,13 +236,16 @@ public static String getString(IMolecularFormula formula, String[] orderElements
245236
}
246237

247238
private static void appendElement(StringBuilder sb, Integer mass, int elem, int count) {
239+
String symbol = Elements.ofNumber(elem).symbol();
240+
if (symbol.isEmpty())
241+
symbol = "R";
248242
if (mass != null)
249243
sb.append('[')
250244
.append(mass)
251245
.append(']')
252-
.append(Elements.ofNumber(elem).symbol());
246+
.append(symbol);
253247
else
254-
sb.append(Elements.ofNumber(elem).symbol());
248+
sb.append(symbol);
255249
if (count != 0)
256250
sb.append(count);
257251
}
@@ -1092,14 +1086,40 @@ public static IMolecularFormula getMolecularFormula(IAtomContainer atomContainer
10921086
* @see #getMolecularFormula(IAtomContainer)
10931087
*/
10941088
public static IMolecularFormula getMolecularFormula(IAtomContainer atomContainer, IMolecularFormula formula) {
1089+
1090+
// mark multi-center attachments to be excluded from the formula
1091+
Set<IAtom> mattach = null;
1092+
List<Sgroup> sgroups = atomContainer.getProperty(CDKConstants.CTAB_SGROUPS);
1093+
if (sgroups != null) {
1094+
for (Sgroup sgroup : sgroups) {
1095+
if (sgroup.getType() == SgroupType.ExtMulticenter) {
1096+
for (IBond bond : sgroup.getBonds()) {
1097+
for (IAtom atom : sgroup.getAtoms()) {
1098+
if (bond.contains(atom)) {
1099+
if (mattach == null)
1100+
mattach = new HashSet<>();
1101+
mattach.add(atom);
1102+
}
1103+
}
1104+
}
1105+
}
1106+
}
1107+
}
1108+
if (mattach == null)
1109+
mattach = Collections.emptySet();
1110+
10951111
int charge = 0;
10961112
int hcnt = 0;
1097-
for (IAtom iAtom : atomContainer.atoms()) {
1098-
formula.addIsotope(iAtom);
1099-
if (iAtom.getFormalCharge() != null)
1100-
charge += iAtom.getFormalCharge();
1101-
if (iAtom.getImplicitHydrogenCount() != null)
1102-
hcnt += iAtom.getImplicitHydrogenCount();
1113+
for (IAtom atm : atomContainer.atoms()) {
1114+
if ((atm instanceof IPseudoAtom && ((IPseudoAtom) atm).getAttachPointNum() != 0))
1115+
continue;
1116+
if (mattach.contains(atm))
1117+
continue;
1118+
formula.addIsotope(atm);
1119+
if (atm.getFormalCharge() != null)
1120+
charge += atm.getFormalCharge();
1121+
if (atm.getImplicitHydrogenCount() != null)
1122+
hcnt += atm.getImplicitHydrogenCount();
11031123
}
11041124
if (hcnt != 0) {
11051125
IAtom hAtom = atomContainer.getBuilder().newInstance(IAtom.class, "H");

0 commit comments

Comments
 (0)