Skip to content

Commit 033d5f5

Browse files
committed
Speed-up getCladeCounts
1 parent 2c3054c commit 033d5f5

File tree

3 files changed

+29
-9
lines changed

3 files changed

+29
-9
lines changed

src/taxonomy/NcbiTaxonomy.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,23 @@ size_t NcbiTaxonomy::loadMerged(const std::string &mergedFile) {
501501
return count;
502502
}
503503

504-
std::unordered_map<TaxID, TaxonCounts> NcbiTaxonomy::getCladeCounts(const std::unordered_map<TaxID, unsigned int>& taxonCounts) const {
504+
std::unordered_map<TaxID, std::vector<TaxID>> NcbiTaxonomy::getParentToChildren() const {
505+
std::unordered_map<TaxID, std::vector<TaxID>> result;
506+
result.reserve(maxNodes);
507+
508+
// Build the adjacency (parent -> children)
509+
for (size_t i = 0; i < maxNodes; ++i) {
510+
const TaxonNode& tn = taxonNodes[i];
511+
if (tn.parentTaxId == tn.taxId) {
512+
continue;
513+
}
514+
result[tn.parentTaxId].push_back(tn.taxId);
515+
}
516+
517+
return result;
518+
}
519+
520+
std::unordered_map<TaxID, TaxonCounts> NcbiTaxonomy::getCladeCounts(const std::unordered_map<TaxID, unsigned int>& taxonCounts, const std::unordered_map<TaxID, std::vector<TaxID>>& parentToChildren) const {
505521
std::unordered_map<TaxID, TaxonCounts> cladeCounts;
506522

507523
for (std::unordered_map<TaxID, unsigned int>::const_iterator it = taxonCounts.begin(); it != taxonCounts.end(); ++it) {
@@ -516,11 +532,12 @@ std::unordered_map<TaxID, TaxonCounts> NcbiTaxonomy::getCladeCounts(const std::u
516532
}
517533
}
518534

519-
for (size_t i = 0; i < maxNodes; ++i) {
520-
TaxonNode& tn = taxonNodes[i];
521-
if (tn.parentTaxId != tn.taxId && cladeCounts.count(tn.taxId)) {
522-
std::unordered_map<TaxID, TaxonCounts>::iterator itp = cladeCounts.find(tn.parentTaxId);
523-
itp->second.children.push_back(tn.taxId);
535+
for (std::unordered_map<TaxID, TaxonCounts>::iterator it = cladeCounts.begin(); it != cladeCounts.end(); ++it) {
536+
TaxID parentTaxId = it->first;
537+
TaxonCounts& taxCounts = it->second;
538+
std::unordered_map<TaxID, std::vector<TaxID>>::const_iterator ptcIt = parentToChildren.find(parentTaxId);
539+
if (ptcIt != parentToChildren.end()) {
540+
taxCounts.children = ptcIt->second;
524541
}
525542
}
526543

src/taxonomy/NcbiTaxonomy.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ class NcbiTaxonomy {
107107
TaxonNode const* taxonNode(TaxID taxonId, bool fail = true) const;
108108
bool nodeExists(TaxID taxId) const;
109109

110-
std::unordered_map<TaxID, TaxonCounts> getCladeCounts(const std::unordered_map<TaxID, unsigned int>& taxonCounts) const;
110+
std::unordered_map<TaxID, std::vector<TaxID>> getParentToChildren() const;
111+
std::unordered_map<TaxID, TaxonCounts> getCladeCounts(const std::unordered_map<TaxID, unsigned int>& taxonCounts, const std::unordered_map<TaxID, std::vector<TaxID>>& parentToChildren) const;
111112

112113
WeightedTaxResult weightedMajorityLCA(const std::vector<WeightedTaxHit> &setTaxa, const float majorityCutoff);
113114

src/taxonomy/taxonomyreport.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ int taxonomyreport(int argc, const char **argv, const Command &command) {
194194
DBWriter writer(par.db3.c_str(), par.db3Index.c_str(), localThreads, false, mode);
195195
writer.open();
196196

197+
std::unordered_map<TaxID, std::vector<TaxID>> parentToChildren = taxDB->getParentToChildren();
198+
197199
std::unordered_map<TaxID, unsigned int> taxCounts;
198200
Debug::Progress progress(reader.getSize());
199201
#pragma omp parallel num_threads(localThreads)
@@ -233,7 +235,7 @@ int taxonomyreport(int argc, const char **argv, const Command &command) {
233235
data = Util::skipLine(data);
234236
}
235237
if (par.reportMode == 2) {
236-
std::unordered_map<TaxID, TaxonCounts> cladeCounts = taxDB->getCladeCounts(localTaxCounts);
238+
std::unordered_map<TaxID, TaxonCounts> cladeCounts = taxDB->getCladeCounts(localTaxCounts, parentToChildren);
237239
writer.writeStart(thread_idx);
238240
taxReport(writer, thread_idx, *taxDB, cladeCounts, entryCount);
239241
writer.writeEnd(reader.getDbKey(i), thread_idx);
@@ -258,7 +260,7 @@ int taxonomyreport(int argc, const char **argv, const Command &command) {
258260
reader.close();
259261

260262
Debug(Debug::INFO) << "Calculating clade counts ... ";
261-
std::unordered_map<TaxID, TaxonCounts> cladeCounts = taxDB->getCladeCounts(taxCounts);
263+
std::unordered_map<TaxID, TaxonCounts> cladeCounts = taxDB->getCladeCounts(taxCounts, parentToChildren);
262264
Debug(Debug::INFO) << " Done\n";
263265
if (par.reportMode == 0) {
264266
writer.writeStart(0);

0 commit comments

Comments
 (0)