Skip to content

Commit f809e7a

Browse files
committed
Utility to get the most abundant MF (MAMF).
1 parent 7c5ec18 commit f809e7a

File tree

2 files changed

+126
-11
lines changed

2 files changed

+126
-11
lines changed

tool/formula/src/main/java/org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator.java

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@
5858
*/
5959
public class MolecularFormulaManipulator {
6060

61+
public static final Comparator<IIsotope> NAT_ABUN_COMP = new Comparator<IIsotope>() {
62+
@Override
63+
public int compare(IIsotope o1, IIsotope o2) {
64+
return -Double.compare(o1.getNaturalAbundance(),
65+
o2.getNaturalAbundance());
66+
}
67+
};
68+
6169
/**
6270
* Checks a set of Nodes for the occurrence of each isotopes
6371
* instance in the molecular formula. In short number of atoms.
@@ -1411,4 +1419,89 @@ else if (proton != null &&
14111419

14121420
return true;
14131421
}
1422+
1423+
/**
1424+
* Helper method for adding isotope distributions to a MF. The method adds
1425+
* a distribution of isotopes by splitting the set of isotopes in two,
1426+
* the one under consideration (specified by 'idx') and the remaining to be
1427+
* considered ('&gt;idx'). The inflection point is calculate as 'k'
1428+
* &le 'count' isotopes added. If there are remaining isotopes the method
1429+
* calls it's self with 'idx+1' and 'count := k'.
1430+
*
1431+
* @param mf the molecular formula to update
1432+
* @param isotopes the isotopes, sorted most abundance to least
1433+
* @param idx which isotope we're currently considering
1434+
* @param count the number of isotopes remaining to select from
1435+
* @return the distribution is unique (or not)
1436+
*/
1437+
private static boolean addIsotopeDist(IMolecularFormula mf,
1438+
IIsotope[] isotopes,
1439+
int idx, int count) {
1440+
if (count == 0)
1441+
return true;
1442+
double frac = 100d;
1443+
for (int i = 0; i < idx; i++)
1444+
frac -= isotopes[i].getNaturalAbundance();
1445+
double p = isotopes[idx].getNaturalAbundance() / frac;
1446+
1447+
if (p >= 1.0) {
1448+
mf.addIsotope(isotopes[idx], count);
1449+
return true;
1450+
}
1451+
1452+
double kMin = (count + 1) * (1 - p) - 1;
1453+
double kMax = (count + 1) * (1 - p);
1454+
if ((int) Math.ceil(kMin) == (int) Math.floor(kMax)) {
1455+
int k = (int) kMax;
1456+
mf.addIsotope(isotopes[idx], count - k);
1457+
// recurse with remaining
1458+
return addIsotopeDist(mf, isotopes, idx + 1, k);
1459+
}
1460+
return false; // multiple are most abundant
1461+
}
1462+
1463+
/**
1464+
* Compute the most abundant MF. Given the MF C<sub>6</sub>Br<sub>6</sub>
1465+
* this function rapidly computes the most abundant MF as
1466+
* <sup>12</sup>C<sub>6</sub><sup>79</sup>Br<sub>3</sub><sup>81
1467+
* </sup>Br<sub>3</sub>.
1468+
*
1469+
* @param mf a molecular formula with unspecified isotopes
1470+
* @return the most abundant MF, or null if it could not be computed
1471+
*/
1472+
public static IMolecularFormula getMostAbundant(IMolecularFormula mf) {
1473+
final Isotopes isofact;
1474+
try {
1475+
isofact = Isotopes.getInstance();
1476+
} catch (IOException e) {
1477+
return null;
1478+
}
1479+
IMolecularFormula res = mf.getBuilder()
1480+
.newInstance(IMolecularFormula.class);
1481+
for (IIsotope iso : mf.isotopes()) {
1482+
int count = mf.getIsotopeCount(iso);
1483+
if (iso.getMassNumber() == null || iso.getMassNumber() == 0) {
1484+
IIsotope[] isotopes = isofact.getIsotopes(iso.getSymbol());
1485+
Arrays.sort(isotopes, NAT_ABUN_COMP);
1486+
if (!addIsotopeDist(res, isotopes, 0, count))
1487+
return null;
1488+
} else
1489+
res.addIsotope(iso, count);
1490+
}
1491+
return res;
1492+
}
1493+
1494+
/**
1495+
* Compute the most abundant MF. Given the a molecule
1496+
* C<sub>6</sub>Br<sub>6</sub> this function rapidly computes the most
1497+
* abundant MF as
1498+
* <sup>12</sup>C<sub>6</sub><sup>79</sup>Br<sub>3</sub><sup>81
1499+
* </sup>Br<sub>3</sub>.
1500+
*
1501+
* @param mol a molecule with unspecified isotopes
1502+
* @return the most abundant MF, or null if it could not be computed
1503+
*/
1504+
public static IMolecularFormula getMostAbundant(IAtomContainer mol) {
1505+
return getMostAbundant(getMolecularFormula(mol));
1506+
}
14141507
}

tool/formula/src/test/java/org/openscience/cdk/tools/manipulator/MolecularFormulaManipulatorTest.java

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,6 @@
1818
*/
1919
package org.openscience.cdk.tools.manipulator;
2020

21-
import static org.hamcrest.CoreMatchers.is;
22-
import static org.hamcrest.MatcherAssert.assertThat;
23-
import static org.hamcrest.number.IsCloseTo.closeTo;
24-
import static org.junit.Assert.assertFalse;
25-
import static org.junit.Assert.assertTrue;
26-
27-
import java.io.IOException;
28-
import java.io.InputStream;
29-
import java.util.Arrays;
30-
import java.util.List;
31-
3221
import org.junit.Assert;
3322
import org.junit.Test;
3423
import org.openscience.cdk.Atom;
@@ -53,6 +42,18 @@
5342
import org.openscience.cdk.templates.TestMoleculeFactory;
5443
import org.openscience.cdk.tools.CDKHydrogenAdder;
5544

45+
import java.io.IOException;
46+
import java.io.InputStream;
47+
import java.util.Arrays;
48+
import java.util.List;
49+
50+
import static org.hamcrest.CoreMatchers.is;
51+
import static org.hamcrest.MatcherAssert.assertThat;
52+
import static org.hamcrest.number.IsCloseTo.closeTo;
53+
import static org.junit.Assert.assertFalse;
54+
import static org.junit.Assert.assertTrue;
55+
import static org.openscience.cdk.tools.manipulator.AtomContainerManipulator.*;
56+
5657
/**
5758
* Checks the functionality of the MolecularFormulaManipulator.
5859
*
@@ -1375,4 +1376,25 @@ public void testRoundTripCharge() {
13751376
SilentChemObjectBuilder.getInstance());
13761377
assertThat(MolecularFormulaManipulator.getString(m), is("[C3H7]+"));
13771378
}
1379+
1380+
@Test
1381+
public void getMostAbundant() {
1382+
IMolecularFormula mf = new MolecularFormula();
1383+
mf.addIsotope(new Atom("C"), 6);
1384+
mf.addIsotope(new Atom("Br"), 6);
1385+
IMolecularFormula mamf = MolecularFormulaManipulator.getMostAbundant(mf);
1386+
assertThat(MolecularFormulaManipulator.getString(mamf, false, true),
1387+
is("[12]C6[79]Br3[81]Br3"));
1388+
}
1389+
1390+
// Iron has 4 stable isotopes, 54 @ 5.85%, 56 @ 91.57%, 57 @ 2.12%, and
1391+
// 58 @ 0.28%. Given 100 iron's we expected ~6 @ 54, ~92 @ 56 and 2 @ 57
1392+
@Test
1393+
public void getMostAbundantFe100() {
1394+
IMolecularFormula mf = new MolecularFormula();
1395+
mf.addIsotope(new Atom("Fe"), 100);
1396+
IMolecularFormula mamf = MolecularFormulaManipulator.getMostAbundant(mf);
1397+
assertThat(MolecularFormulaManipulator.getString(mamf, false, true),
1398+
is("[54]Fe6[56]Fe92[57]Fe2"));
1399+
}
13781400
}

0 commit comments

Comments
 (0)