Skip to content

Commit 48e9487

Browse files
Address Dimitrios' comments 1
1 parent 36c2d48 commit 48e9487

1 file changed

Lines changed: 17 additions & 5 deletions

File tree

vectors_deep1b/_tools/parse.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
1+
#!/usr/bin/env python3
12
import sys
23
import struct
34
import json
4-
from tqdm import tqdm
5+
6+
try:
7+
from tqdm import tqdm
8+
iterate = lambda i: tqdm(range(i))
9+
except ModuleNotFoundError:
10+
print("Warning: [tqdm] package is not available and you won't be able to see progress.", file=sys.stderr)
11+
iterate = range
12+
13+
dims = 96
14+
num_vectors = 9990000
15+
516

617
def to_json(f):
7-
for i in tqdm(range(num_vectors)) :
18+
for i in iterate(num_vectors):
819
f.read(4) # in .fvecs format for each vector the first 4 bytes represent dim
920
vector = struct.unpack('f' * dims, f.read(dims * 4))
1021

@@ -13,8 +24,9 @@ def to_json(f):
1324
print(json.dumps(record, ensure_ascii=False))
1425

1526

16-
dims = 96
17-
num_vectors = 9990000
27+
if len(sys.argv) != 2:
28+
print(f"Error: No .fvecs file. Rerun using [{sys.argv[0]} /path/to/deep10M.fvecs].")
29+
sys.exit(1)
1830

1931
with open(sys.argv[1], 'rb') as f:
20-
to_json(f)
32+
to_json(f)

0 commit comments

Comments
 (0)