|
58 | 58 | */ |
59 | 59 | public class MolecularFormulaManipulator { |
60 | 60 |
|
| 61 | + public static final Comparator<IIsotope> NAT_ABUN_COMP = new Comparator<IIsotope>() { |
| 62 | + @Override |
| 63 | + public int compare(IIsotope o1, IIsotope o2) { |
| 64 | + return -Double.compare(o1.getNaturalAbundance(), |
| 65 | + o2.getNaturalAbundance()); |
| 66 | + } |
| 67 | + }; |
| 68 | + |
61 | 69 | /** |
62 | 70 | * Checks a set of Nodes for the occurrence of each isotopes |
63 | 71 | * instance in the molecular formula. In short number of atoms. |
@@ -1411,4 +1419,89 @@ else if (proton != null && |
1411 | 1419 |
|
1412 | 1420 | return true; |
1413 | 1421 | } |
| 1422 | + |
| 1423 | + /** |
| 1424 | + * Helper method for adding isotope distributions to a MF. The method adds |
| 1425 | + * a distribution of isotopes by splitting the set of isotopes in two, |
| 1426 | + * the one under consideration (specified by 'idx') and the remaining to be |
| 1427 | + * considered ('>idx'). The inflection point is calculate as 'k' |
| 1428 | + * &le 'count' isotopes added. If there are remaining isotopes the method |
| 1429 | + * calls it's self with 'idx+1' and 'count := k'. |
| 1430 | + * |
| 1431 | + * @param mf the molecular formula to update |
| 1432 | + * @param isotopes the isotopes, sorted most abundance to least |
| 1433 | + * @param idx which isotope we're currently considering |
| 1434 | + * @param count the number of isotopes remaining to select from |
| 1435 | + * @return the distribution is unique (or not) |
| 1436 | + */ |
| 1437 | + private static boolean addIsotopeDist(IMolecularFormula mf, |
| 1438 | + IIsotope[] isotopes, |
| 1439 | + int idx, int count) { |
| 1440 | + if (count == 0) |
| 1441 | + return true; |
| 1442 | + double frac = 100d; |
| 1443 | + for (int i = 0; i < idx; i++) |
| 1444 | + frac -= isotopes[i].getNaturalAbundance(); |
| 1445 | + double p = isotopes[idx].getNaturalAbundance() / frac; |
| 1446 | + |
| 1447 | + if (p >= 1.0) { |
| 1448 | + mf.addIsotope(isotopes[idx], count); |
| 1449 | + return true; |
| 1450 | + } |
| 1451 | + |
| 1452 | + double kMin = (count + 1) * (1 - p) - 1; |
| 1453 | + double kMax = (count + 1) * (1 - p); |
| 1454 | + if ((int) Math.ceil(kMin) == (int) Math.floor(kMax)) { |
| 1455 | + int k = (int) kMax; |
| 1456 | + mf.addIsotope(isotopes[idx], count - k); |
| 1457 | + // recurse with remaining |
| 1458 | + return addIsotopeDist(mf, isotopes, idx + 1, k); |
| 1459 | + } |
| 1460 | + return false; // multiple are most abundant |
| 1461 | + } |
| 1462 | + |
| 1463 | + /** |
| 1464 | + * Compute the most abundant MF. Given the MF C<sub>6</sub>Br<sub>6</sub> |
| 1465 | + * this function rapidly computes the most abundant MF as |
| 1466 | + * <sup>12</sup>C<sub>6</sub><sup>79</sup>Br<sub>3</sub><sup>81 |
| 1467 | + * </sup>Br<sub>3</sub>. |
| 1468 | + * |
| 1469 | + * @param mf a molecular formula with unspecified isotopes |
| 1470 | + * @return the most abundant MF, or null if it could not be computed |
| 1471 | + */ |
| 1472 | + public static IMolecularFormula getMostAbundant(IMolecularFormula mf) { |
| 1473 | + final Isotopes isofact; |
| 1474 | + try { |
| 1475 | + isofact = Isotopes.getInstance(); |
| 1476 | + } catch (IOException e) { |
| 1477 | + return null; |
| 1478 | + } |
| 1479 | + IMolecularFormula res = mf.getBuilder() |
| 1480 | + .newInstance(IMolecularFormula.class); |
| 1481 | + for (IIsotope iso : mf.isotopes()) { |
| 1482 | + int count = mf.getIsotopeCount(iso); |
| 1483 | + if (iso.getMassNumber() == null || iso.getMassNumber() == 0) { |
| 1484 | + IIsotope[] isotopes = isofact.getIsotopes(iso.getSymbol()); |
| 1485 | + Arrays.sort(isotopes, NAT_ABUN_COMP); |
| 1486 | + if (!addIsotopeDist(res, isotopes, 0, count)) |
| 1487 | + return null; |
| 1488 | + } else |
| 1489 | + res.addIsotope(iso, count); |
| 1490 | + } |
| 1491 | + return res; |
| 1492 | + } |
| 1493 | + |
| 1494 | + /** |
| 1495 | + * Compute the most abundant MF. Given the a molecule |
| 1496 | + * C<sub>6</sub>Br<sub>6</sub> this function rapidly computes the most |
| 1497 | + * abundant MF as |
| 1498 | + * <sup>12</sup>C<sub>6</sub><sup>79</sup>Br<sub>3</sub><sup>81 |
| 1499 | + * </sup>Br<sub>3</sub>. |
| 1500 | + * |
| 1501 | + * @param mol a molecule with unspecified isotopes |
| 1502 | + * @return the most abundant MF, or null if it could not be computed |
| 1503 | + */ |
| 1504 | + public static IMolecularFormula getMostAbundant(IAtomContainer mol) { |
| 1505 | + return getMostAbundant(getMolecularFormula(mol)); |
| 1506 | + } |
1414 | 1507 | } |
0 commit comments