Skip to content

Commit da7c328

Browse files
authored
Updated breakExtractor for nested parenthesis
Added support in breakExtractor for formulas with nested parenthesis, using a more elegant regex solution.
1 parent ff4d361 commit da7c328

File tree

1 file changed

+19
-35
lines changed

1 file changed

+19
-35
lines changed

tool/formula/src/main/java/org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator.java

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,41 +1232,25 @@ public static String simplifyMolecularFormula(String formula) {
12321232
* @return Formula with the correction
12331233
*/
12341234
private static String breakExtractor(String formula) {
1235-
boolean started = false;
1236-
boolean finalBreak = false;
1237-
String recentformula = "";
1238-
String multiple = "";
1239-
String finalformula = "";
1240-
for (int f = 0; f < formula.length(); f++) {
1241-
char thisChar = formula.charAt(f);
1242-
if (!started) {
1243-
if (thisChar == '(') {
1244-
// start
1245-
started = true;
1246-
}else {
1247-
finalformula += thisChar;
1248-
}
1249-
}else {
1250-
if (thisChar == ')') {
1251-
// final
1252-
finalBreak = true;
1253-
} else if (!finalBreak) {
1254-
recentformula += thisChar;
1255-
} else if ( isDigit(thisChar) ){
1256-
multiple += thisChar;
1257-
} else {
1258-
finalformula += formula.substring(f, formula.length());
1259-
break;
1260-
}
1261-
}
1262-
}
1263-
finalformula += muliplier(recentformula, multiple.isEmpty() ? 1:Integer.valueOf(multiple));
1264-
1265-
if (finalformula.contains("("))
1266-
return breakExtractor(finalformula);
1267-
else
1268-
return finalformula;
1269-
}
1235+
Pattern pattern = Pattern.compile("(.*)\\(([^(]+?)\\)([0-9]*)(.*)");
1236+
1237+
while (formula.contains("(")) {
1238+
Matcher matcher = pattern.matcher(formula);
1239+
String newFormula = formula;
1240+
1241+
while ( matcher.find() ) {
1242+
String multiplierStr = matcher.group(3);
1243+
int multiplier = multiplierStr.isEmpty() ? 1:Integer.parseInt(multiplierStr);
1244+
newFormula = matcher.group(1) + muliplier(matcher.group(2), multiplier) + matcher.group(4);
1245+
}
1246+
1247+
if (newFormula == formula)
1248+
return formula;
1249+
formula = newFormula;
1250+
}
1251+
1252+
return formula;
1253+
}
12701254

12711255
/**
12721256
* The starting with numeric value is used to show a quantity by which a formula is multiplied.

0 commit comments

Comments
 (0)