Skip to content

Commit 04f9914

Browse files
committed
Add an option to decide whether the abbreviation generate is allowed to fully contract the fragment to a single label.
1 parent b82b436 commit 04f9914

File tree

2 files changed

+71
-10
lines changed

2 files changed

+71
-10
lines changed

app/depict/src/main/java/org/openscience/cdk/depict/Abbreviations.java

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ public class Abbreviations implements Iterable<String> {
132132

133133
private final SmilesParser smipar = new SmilesParser(SilentChemObjectBuilder.getInstance());
134134
private boolean contractOnHetero = true;
135+
private boolean contractSingleFragments = false;
135136

136137
public Abbreviations() {
137138
}
@@ -179,6 +180,10 @@ public void setContractOnHetero(boolean val) {
179180
this.contractOnHetero = val;
180181
}
181182

183+
public void setContractToSingleLabel(boolean val) {
184+
this.contractSingleFragments = val;
185+
}
186+
182187
private static Set<IBond> findCutBonds(IAtomContainer mol, EdgeToBondMap bmap, int[][] adjlist) {
183188
Set<IBond> cuts = new HashSet<>();
184189
int numAtoms = mol.getAtomCount();
@@ -334,14 +339,16 @@ public List<Sgroup> generate(final IAtomContainer mol) {
334339
usedAtoms.addAll(sgroup.getAtoms());
335340
}
336341

342+
final List<Sgroup> newSgroups = new ArrayList<>();
343+
337344
// disconnected abbreviations, salts, common reagents, large compounds
338345
if (usedAtoms.isEmpty()) {
339346
try {
340347
IAtomContainer copy = AtomContainerManipulator.copyAndSuppressedHydrogens(mol);
341348
String cansmi = usmigen.create(copy);
342349
String label = disconnectedAbbreviations.get(cansmi);
343350

344-
if (label != null && !disabled.contains(label)) {
351+
if (label != null && !disabled.contains(label) && contractSingleFragments) {
345352
Sgroup sgroup = new Sgroup();
346353
sgroup.setType(SgroupType.CtabAbbreviation);
347354
sgroup.setSubscript(label);
@@ -351,7 +358,8 @@ public List<Sgroup> generate(final IAtomContainer mol) {
351358
} else if (cansmi.contains(".")) {
352359
IAtomContainerSet parts = ConnectivityChecker.partitionIntoMolecules(mol);
353360

354-
// partiton in two two parts
361+
362+
// leave one out
355363
Sgroup best = null;
356364
for (int i = 0; i < parts.getAtomContainerCount(); i++) {
357365
IAtomContainer a = parts.getAtomContainer(i);
@@ -361,7 +369,7 @@ public List<Sgroup> generate(final IAtomContainer mol) {
361369
b.add(parts.getAtomContainer(j));
362370
Sgroup sgroup1 = getAbbr(a);
363371
Sgroup sgroup2 = getAbbr(b);
364-
if (sgroup1 != null && sgroup2 != null) {
372+
if (sgroup1 != null && sgroup2 != null && contractSingleFragments) {
365373
Sgroup combined = new Sgroup();
366374
label = null;
367375
for (IAtom atom : sgroup1.getAtoms())
@@ -375,21 +383,22 @@ public List<Sgroup> generate(final IAtomContainer mol) {
375383
combined.setType(SgroupType.CtabAbbreviation);
376384
return Collections.singletonList(combined);
377385
}
378-
if (sgroup1 != null && (best == null || sgroup1.getAtoms().size() > best.getAtoms().size()))
386+
if (sgroup1 != null && (best == null || sgroup1.getAtoms().size() < best.getAtoms().size()))
379387
best = sgroup1;
380-
if (sgroup2 != null && (best == null || sgroup2.getAtoms().size() > best.getAtoms().size()))
388+
if (sgroup2 != null && (best == null || sgroup2.getAtoms().size() < best.getAtoms().size()))
381389
best = sgroup2;
382390
}
383391

384-
if (best != null)
385-
return Collections.singletonList(best);
392+
if (best != null) {
393+
newSgroups.add(best);
394+
usedAtoms.addAll(best.getAtoms());
395+
}
386396
}
387397

388398
} catch (CDKException ignored) {
389399
}
390400
}
391401

392-
final List<Sgroup> newSgroups = new ArrayList<>();
393402
List<IAtomContainer> fragments = generateFragments(mol);
394403
Multimap<IAtom, Sgroup> sgroupAdjs = ArrayListMultimap.create();
395404

app/depict/src/test/java/org/openscience/cdk/depict/AbbreviationsTest.java

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@
2323

2424
package org.openscience.cdk.depict;
2525

26-
import org.junit.Ignore;
26+
import org.hamcrest.CoreMatchers;
2727
import org.junit.Test;
2828
import org.openscience.cdk.CDKConstants;
29-
import org.openscience.cdk.exception.InvalidSmilesException;
3029
import org.openscience.cdk.interfaces.IAtomContainer;
3130
import org.openscience.cdk.sgroup.Sgroup;
3231
import org.openscience.cdk.sgroup.SgroupType;
@@ -85,6 +84,59 @@ public void phenylShouldNotMatchBenzene() throws Exception {
8584
assertThat(sgroups.size(), is(0));
8685
}
8786

87+
@Test
88+
public void TFASaltDisconnected() throws Exception {
89+
Abbreviations factory = new Abbreviations();
90+
IAtomContainer mol = smi("c1ccccc1c1ccccc1.FC(F)(F)C(=O)O");
91+
factory.add("*C(F)(F)F CF3");
92+
factory.add("*C(=O)O CO2H");
93+
factory.add("FC(F)(F)C(=O)O TFA");
94+
List<Sgroup> sgroups = factory.generate(mol);
95+
assertThat(sgroups.size(), is(1));
96+
assertThat(sgroups.get(0).getSubscript(), is("TFA"));
97+
}
98+
99+
@Test
100+
public void TFASaltConnected() throws Exception {
101+
Abbreviations factory = new Abbreviations();
102+
IAtomContainer mol = smi("FC(F)(F)C(=O)O");
103+
factory.add("*C(F)(F)F CF3");
104+
factory.add("*C(=O)O CO2H");
105+
factory.add("FC(F)(F)C(=O)O TFA");
106+
List<Sgroup> sgroups = factory.generate(mol);
107+
assertThat(sgroups.size(), is(2));
108+
assertThat(sgroups.get(0).getSubscript(),
109+
CoreMatchers.anyOf(is("CF3"), is("CO2H")));
110+
assertThat(sgroups.get(1).getSubscript(),
111+
CoreMatchers.anyOf(is("CF3"), is("CO2H")));
112+
assertThat(sgroups.get(1).getSubscript(),
113+
CoreMatchers.not(is(sgroups.get(0).getSubscript())));
114+
}
115+
116+
@Test
117+
public void DcmAndTfa() throws Exception {
118+
Abbreviations factory = new Abbreviations();
119+
IAtomContainer mol = smi("ClCCl.FC(F)(F)C(=O)O");
120+
factory.add("ClCCl DCM");
121+
factory.add("FC(F)(F)C(=O)O TFA");
122+
factory.setContractToSingleLabel(true);
123+
List<Sgroup> sgroups = factory.generate(mol);
124+
assertThat(sgroups.size(), is(1));
125+
assertThat(sgroups.get(0).getSubscript(), is("TFA·DCM"));
126+
}
127+
128+
@Test
129+
public void DcmAndTfaNoSingleFrag() throws Exception {
130+
Abbreviations factory = new Abbreviations();
131+
IAtomContainer mol = smi("ClCCl.FC(F)(F)C(=O)O");
132+
factory.add("ClCCl DCM");
133+
factory.add("FC(F)(F)C(=O)O TFA");
134+
factory.setContractToSingleLabel(false);
135+
List<Sgroup> sgroups = factory.generate(mol);
136+
assertThat(sgroups.size(), is(1));
137+
assertThat(sgroups.get(0).getSubscript(), is("DCM"));
138+
}
139+
88140
@Test
89141
public void avoidOverZealousAbbreviations() throws Exception {
90142
Abbreviations factory = new Abbreviations();

0 commit comments

Comments
 (0)