Skip to content
This repository was archived by the owner on Mar 19, 2024. It is now read-only.

Commit 256032b

Browse files
Celebiofacebook-github-bot
authored andcommitted
remove printing functions from fasttext class
Summary: This diff removes the print capabilities from fasttext and defines a new api. - `predictLine` extracts predictions from exactly one line of the input stream. - the deprecated `printLabelStats` is removed as [js bindings don't use it]( https://www.facebook.com/groups/1174547215919768/?multi_permalinks=2328051983902613&comment_id=2360179150689896 ) - `ngramVectors` is now deprecated by the addition of `getNgramVectors`. `Vector` class remains copy-free but move semantics has been added. - `analogies` is now deprecated by `getAnalogies`. when called, fastText class lazy-precomputes word vectors - `findNN` is now deprecated by `getNN`. when called, fastText class lazy-precomputes word vectors - `trainThread` and `printInfo` functions are now private. - `supervised`, `cbow`, `skipgram`, `selectEmbeddings`, `precomputeWordVectors` are now deprecated and will be private in the future. - `saveVectors`, `saveOutput` and `saveModel` without arguments are now deprecated by their equivalent with filename as string argument. Reviewed By: EdouardGrave Differential Revision: D13083799 fbshipit-source-id: f557ed7c141a90a6171045fe118ac16c195c824f
1 parent 4aee63d commit 256032b

9 files changed

Lines changed: 395 additions & 225 deletions

File tree

python/fastText/FastText.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,16 @@ def check(entry):
130130

131131
if type(text) == list:
132132
text = [check(entry) for entry in text]
133-
all_probs, all_labels = self.f.multilinePredict(text, k, threshold)
134-
return all_labels, np.array(all_probs, copy=False)
133+
predictions = self.f.multilinePredict(text, k, threshold)
134+
dt = np.dtype([('probability', 'float64'), ('label', '<U32')])
135+
result_as_pair = np.array(predictions, dtype=dt)
136+
137+
return result_as_pair['label'].tolist(), result_as_pair['probability']
135138
else:
136139
text = check(text)
137-
pairs = self.f.predict(text, k, threshold)
138-
probs, labels = zip(*pairs)
140+
predictions = self.f.predict(text, k, threshold)
141+
probs, labels = zip(*predictions)
142+
139143
return labels, np.array(probs, copy=False)
140144

141145
def get_input_matrix(self):

python/fastText/pybind/fasttext_pybind.cc

Lines changed: 23 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -262,23 +262,30 @@ PYBIND11_MODULE(fasttext_pybind, m) {
262262
const std::string text,
263263
int32_t k,
264264
fasttext::real threshold) {
265-
std::vector<std::pair<fasttext::real, int32_t>> predictions;
266-
std::vector<std::pair<fasttext::real, std::string>> all_predictions;
267265
std::stringstream ioss(text);
268-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
269-
std::vector<int32_t> words, labels;
270-
d->getLine(ioss, words, labels);
271-
m.predict(k, words, predictions, threshold);
272-
std::transform(
273-
predictions.begin(),
274-
predictions.end(),
275-
std::back_inserter(all_predictions),
276-
[&d](const std::pair<fasttext::real, int32_t>& prediction) {
277-
return std::pair<fasttext::real, std::string>(
278-
std::exp(prediction.first),
279-
d->getLabel(prediction.second));
280-
});
281-
return all_predictions;
266+
std::vector<std::pair<fasttext::real, std::string>> predictions;
267+
m.predictLine(ioss, predictions, k, threshold);
268+
269+
return predictions;
270+
})
271+
.def(
272+
"multilinePredict",
273+
// NOTE: text needs to end in a newline
274+
// to exactly mimic the behavior of the cli
275+
[](fasttext::FastText& m,
276+
const std::vector<std::string>& lines,
277+
int32_t k,
278+
fasttext::real threshold) {
279+
std::vector<std::vector<std::pair<fasttext::real, std::string>>>
280+
allPredictions;
281+
std::vector<std::pair<fasttext::real, std::string>> predictions;
282+
283+
for (const std::string& text : lines) {
284+
std::stringstream ioss(text);
285+
m.predictLine(ioss, predictions, k, threshold);
286+
allPredictions.push_back(predictions);
287+
}
288+
return allPredictions;
282289
})
283290
.def(
284291
"testLabel",
@@ -303,38 +310,6 @@ PYBIND11_MODULE(fasttext_pybind, m) {
303310

304311
return returnedValue;
305312
})
306-
.def(
307-
"multilinePredict",
308-
// NOTE: text needs to end in a newline
309-
// to exactly mimic the behavior of the cli
310-
[](fasttext::FastText& m,
311-
const std::vector<std::string>& lines,
312-
int32_t k,
313-
fasttext::real threshold) {
314-
std::pair<
315-
std::vector<std::vector<fasttext::real>>,
316-
std::vector<std::vector<std::string>>>
317-
all_predictions;
318-
std::vector<std::pair<fasttext::real, int32_t>> predictions;
319-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
320-
std::vector<int32_t> words, labels;
321-
for (const std::string& text : lines) {
322-
std::stringstream ioss(text);
323-
predictions.clear();
324-
d->getLine(ioss, words, labels);
325-
m.predict(k, words, predictions, threshold);
326-
all_predictions.first.push_back(std::vector<fasttext::real>());
327-
all_predictions.second.push_back(std::vector<std::string>());
328-
for (auto& pair : predictions) {
329-
pair.first = std::exp(pair.first);
330-
all_predictions.first[all_predictions.first.size() - 1]
331-
.push_back(pair.first);
332-
all_predictions.second[all_predictions.second.size() - 1]
333-
.push_back(d->getLabel(pair.second));
334-
}
335-
}
336-
return all_predictions;
337-
})
338313
.def(
339314
"getWordId",
340315
[](fasttext::FastText& m, const std::string word) {

0 commit comments

Comments
 (0)