forked from facebookresearch/faiss
-
Notifications
You must be signed in to change notification settings - Fork 1
/
OnDiskInvertedLists.h
127 lines (97 loc) · 3.59 KB
/
OnDiskInvertedLists.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_ON_DISK_INVERTED_LISTS_H
#define FAISS_ON_DISK_INVERTED_LISTS_H
#include <vector>
#include <list>
#include <faiss/IndexIVF.h>
namespace faiss {
struct LockLevels;
/** On-disk storage of inverted lists.
*
* The data is stored in a mmapped chunk of memory (base ptointer ptr,
* size totsize). Each list is a range of memory that contains (object
* List) that contains:
*
* - uint8_t codes[capacity * code_size]
* - followed by idx_t ids[capacity]
*
* in each of the arrays, the size <= capacity first elements are
* used, the rest is not initialized.
*
* Addition and resize are supported by:
* - roundind up the capacity of the lists to a power of two
* - maintaining a list of empty slots, sorted by size.
* - resizing the mmapped block is adjusted as needed.
*
* An OnDiskInvertedLists is compact if the size == capacity for all
* lists and there are no available slots.
*
* Addition to the invlists is slow. For incremental add it is better
* to use a default ArrayInvertedLists object and convert it to an
* OnDisk with merge_from.
*
* When it is known that a set of lists will be accessed, it is useful
* to call prefetch_lists, that launches a set of threads to read the
* lists in parallel.
*/
struct OnDiskInvertedLists: InvertedLists {
struct List {
size_t size; // size of inverted list (entries)
size_t capacity; // allocated size (entries)
size_t offset; // offset in buffer (bytes)
List ();
};
// size nlist
std::vector<List> lists;
struct Slot {
size_t offset; // bytes
size_t capacity; // bytes
Slot (size_t offset, size_t capacity);
Slot ();
};
// size whatever space remains
std::list<Slot> slots;
std::string filename;
size_t totsize;
uint8_t *ptr; // mmap base pointer
bool read_only; /// are inverted lists mapped read-only
OnDiskInvertedLists (size_t nlist, size_t code_size,
const char *filename);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
// copy all inverted lists into *this, in compact form (without
// allocating slots)
size_t merge_from (const InvertedLists **ils, int n_il, bool verbose=false);
/// restrict the inverted lists to l0:l1 without touching the mmapped region
void crop_invlists(size_t l0, size_t l1);
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
virtual ~OnDiskInvertedLists ();
// private
LockLevels * locks;
// encapsulates the threads that are busy prefeteching
struct OngoingPrefetch;
OngoingPrefetch *pf;
int prefetch_nthread;
void do_mmap ();
void update_totsize (size_t new_totsize);
void resize_locked (size_t list_no, size_t new_size);
size_t allocate_slot (size_t capacity);
void free_slot (size_t offset, size_t capacity);
// empty constructor for the I/O functions
OnDiskInvertedLists ();
};
} // namespace faiss
#endif