?
/**
@file file_contexts.c
@brief Cache sent fs events
@details Copyright (c) 2023 Acronis International GmbH
@author Bruce Wang (bruce.wang@acronis.com)
@since $Id: $
*/
#include "compat.h"
#include "debug.h"
#include "file_contexts.h"
#include "file_contexts_priv.h"
#include "memory.h"
#include <linux/jiffies.h>
#ifndef list_first_entry_or_null
#define list_first_entry_or_null(ptr, type, member) (list_empty(ptr) ? NULL : list_first_entry(ptr, type, member))
#endif
typedef struct
{
file_context_tree_node_t node;
} file_context_process_node_t;
static file_context_manager_t global_fs_event_cache_manager;
static KMEM_STRUCT_CACHE_DECLARE(file_context_open_process_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_open_file_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_rw_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_process_node);
static KMEM_STRUCT_CACHE_DECLARE(file_context_file_modify_node);
static KMEM_STRUCT_CACHE_DECLARE(interval_node);
int file_contexts_init(void)
{
int i = 0;
spin_lock_init(&global_fs_event_cache_manager.writer_lock);
for (; i < MAX_TRANSPORT_SIZE; i++)
{
global_fs_event_cache_manager.tables_for_transport[i] = NULL;
}
global_fs_event_cache_manager.close_modified_table = NULL;
KMEM_STRUCT_CACHE_NAME(file_context_open_process_node) = NULL;
KMEM_STRUCT_CACHE_NAME(file_context_open_file_node) = NULL;
KMEM_STRUCT_CACHE_NAME(file_context_rw_node) = NULL;
KMEM_STRUCT_CACHE_NAME(file_context_process_node) = NULL;
KMEM_STRUCT_CACHE_NAME(file_context_file_modify_node) = NULL;
KMEM_STRUCT_CACHE_NAME(interval_node) = NULL;
if (!KMEM_STRUCT_CACHE_INIT(file_context_open_process_node, 0, NULL))
{
EPRINTF("Failed to create file_context_open_process_node cache");
goto fail;
}
if (!KMEM_STRUCT_CACHE_INIT(file_context_open_file_node, 0, NULL))
{
EPRINTF("Failed to create file_context_open_file_node_t cache");
goto fail;
}
if (!KMEM_STRUCT_CACHE_INIT(file_context_rw_node, 0, NULL))
{
EPRINTF("Failed to create file_context_rw_node cache");
goto fail;
}
if (!KMEM_STRUCT_CACHE_INIT(file_context_process_node, 0, NULL))
{
EPRINTF("Failed to create file_context_process_node cache");
goto fail;
}
if (!KMEM_STRUCT_CACHE_INIT(file_context_file_modify_node, 0, NULL))
{
EPRINTF("Failed to create file_context_file_modify_node cache");
goto fail;
}
if (!KMEM_STRUCT_CACHE_INIT(interval_node, 0, NULL))
{
EPRINTF("Failed to create interval_node cache");
goto fail;
}
return 0;
fail:
file_contexts_init_fail_free();
return -ENOMEM;
}
void file_contexts_init_fail_free(void)
{
KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_process_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node);
KMEM_STRUCT_CACHE_DEINIT(interval_node);
}
static inline void put_file_context_big_table(file_context_big_table_t *entry);
// must be called under table writer_lock
static inline void put_file_context_entry(file_context_tables_t *entry)
{
if (entry->open_table) {
put_file_context_big_table(entry->open_table);
rcu_assign_pointer(entry->open_table, NULL);
}
if (entry->read_table) {
put_file_context_big_table(entry->read_table);
rcu_assign_pointer(entry->read_table, NULL);
}
if (entry->write_table) {
put_file_context_big_table(entry->write_table);
rcu_assign_pointer(entry->write_table, NULL);
}
}
void file_contexts_deinit(void)
{
int i;
file_context_tables_t* tables_to_free[MAX_TRANSPORT_SIZE];
for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
{
tables_to_free[i] = NULL;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
{
if (global_fs_event_cache_manager.tables_for_transport[i])
{
tables_to_free[i] = global_fs_event_cache_manager.tables_for_transport[i];
put_file_context_entry(global_fs_event_cache_manager.tables_for_transport[i]);
rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[i], NULL);
}
}
if (global_fs_event_cache_manager.close_modified_table)
{
put_file_context_big_table(global_fs_event_cache_manager.close_modified_table);
rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table, NULL);
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
synchronize_rcu();
for (i = 0; i < MAX_TRANSPORT_SIZE; i++)
{
if (tables_to_free[i])
mem_free(tables_to_free[i]);
}
// For 'put_file_context_big_table' synchronization
rcu_barrier();
KMEM_STRUCT_CACHE_DEINIT(file_context_open_process_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_open_file_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_rw_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_process_node);
KMEM_STRUCT_CACHE_DEINIT(file_context_file_modify_node);
KMEM_STRUCT_CACHE_DEINIT(interval_node);
}
static inline void get_common_node(file_context_common_node_t *common_node)
{
atomic_inc(&common_node->ref_count);
}
static inline bool get_common_node_rcu(file_context_common_node_t *common_node)
{
return atomic_inc_not_zero(&common_node->ref_count);
}
static inline bool put_test_common_node(file_context_common_node_t *common_node)
{
return atomic_dec_and_test(&common_node->ref_count);
}
static inline void get_ht_node(file_context_ht_node_t *ht_node)
{
return get_common_node(&ht_node->common);
}
static inline bool get_ht_node_rcu(file_context_ht_node_t *ht_node)
{
return get_common_node_rcu(&ht_node->common);
}
static inline void put_ht_node(file_context_ht_node_t *ht_node)
{
if (put_test_common_node(&ht_node->common))
{
call_rcu(&ht_node->rcu, ht_node->free_func);
}
}
static inline void get_tree_node(file_context_tree_node_t *tree_node)
{
return get_common_node(&tree_node->common);
}
static inline void put_tree_node(file_context_tree_node_t *tree_node)
{
if (put_test_common_node(&tree_node->common))
{
tree_node->free_func(tree_node);
}
}
/* This function requires lock*/
static inline void remove_common_node_from_lru(file_context_common_node_t *common_node)
{
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
/* This function requires lock*/
static inline void remove_and_put_ht_node(file_context_ht_node_t *ht_node, file_context_common_table_t *common_table)
{
remove_common_node_from_lru(&ht_node->common);
hash_del_rcu(&ht_node->node);
common_table->size -= 1;
put_ht_node(ht_node);
}
/* This function requires lock, you must put node manually outside of lock*/
static inline void remove_tree_node(file_context_tree_t* tree, file_context_tree_node_t *tree_node)
{
remove_common_node_from_lru(&tree_node->common);
rb_erase(&tree_node->node, &tree->tree);
tree->size -= 1;
}
static void clear_common_table_nolock(file_context_common_table_t *common_table)
{
while (1)
{
file_context_ht_node_t *node = list_first_entry_or_null(&common_table->lru_list, file_context_ht_node_t, common.lru_list_node);
if (!node)
{
break;
}
remove_and_put_ht_node(node, common_table);
}
}
static void clear_tree_nolock(file_context_tree_t* tree)
{
while (1)
{
file_context_tree_node_t *node = list_first_entry_or_null(&tree->lru_list, file_context_tree_node_t, common.lru_list_node);
if (!node)
{
break;
}
remove_tree_node(tree, node);
put_tree_node(node);
}
}
static inline void deferred_free_big_table(struct rcu_head *head)
{
file_context_big_table_t *table = container_of(head, file_context_big_table_t, rcu);
// perhaps this is excessive, but it is better to be safe
clear_common_table_nolock(&table->common_table);
#ifdef KERNEL_MOCK
BUG_ON(table->common_table.size != 0);
#endif
vmem_free(table);
atomic64_sub(1, &g_memory_metrics->total_file_contexts_tables);
}
static inline void put_file_context_big_table(file_context_big_table_t *table)
{
if (atomic_dec_and_test(&table->ref_count))
{
call_rcu(&table->rcu, deferred_free_big_table);
}
}
static file_context_big_table_t *get_file_context_big_table_impl(file_context_tables_t *entry, file_context_table_type_t type)
{
file_context_big_table_t* result = NULL;
switch (type)
{
case FILE_CONTEXT_OPEN_TABLE:
result = rcu_dereference(entry->open_table);
break;
case FILE_CONTEXT_READ_TABLE:
result = rcu_dereference(entry->read_table);
break;
case FILE_CONTEXT_WRITE_TABLE:
result = rcu_dereference(entry->write_table);
break;
}
if (result) {
if (!atomic_inc_not_zero(&result->ref_count)) {
result = NULL;
}
}
return result;
}
static file_context_big_table_t *get_file_context_big_table(transport_id_t transport_id, file_context_table_type_t type)
{
file_context_big_table_t* result = NULL;
file_context_tables_t *entry;
int idx = transport_id_index(transport_id);
if (idx < 0 || idx >= MAX_TRANSPORT_SIZE)
{
return NULL;
}
rcu_read_lock();
entry = rcu_dereference(global_fs_event_cache_manager.tables_for_transport[idx]);
if (entry) {
if (entry->transport_id != transport_id) {
DPRINTF("Transport id mismatch: %ld != %ld", entry->transport_id, transport_id);
} else {
result = get_file_context_big_table_impl(entry, type);
}
}
rcu_read_unlock();
return result;
}
static file_context_big_table_t *get_file_context_close_modified_table(void)
{
file_context_big_table_t* result = NULL;
rcu_read_lock();
result = rcu_dereference(global_fs_event_cache_manager.close_modified_table);
if (result) {
if (!atomic_inc_not_zero(&result->ref_count)) {
result = NULL;
}
}
rcu_read_unlock();
return result;
}
// This function does not verify the transport_id
static file_context_big_table_t *get_file_context_big_table_by_idx(int idx, file_context_table_type_t type)
{
file_context_big_table_t* result = NULL;
file_context_tables_t *entry;
if (idx < 0 || idx >= MAX_TRANSPORT_SIZE)
{
return NULL;
}
rcu_read_lock();
entry = rcu_dereference(global_fs_event_cache_manager.tables_for_transport[idx]);
if (entry) {
result = get_file_context_big_table_impl(entry, type);
}
rcu_read_unlock();
return result;
}
void release_file_context_entry(transport_id_t id)
{
file_context_tables_t* tables = NULL;
int idx = transport_id_index(id);
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables_for_transport[idx])
{
if (global_fs_event_cache_manager.tables_for_transport[idx]->transport_id == id) {
tables = global_fs_event_cache_manager.tables_for_transport[idx];
put_file_context_entry(tables);
rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[idx], NULL);
} else {
WPRINTF("release_file_context_entry: %d, id mismatch: %llu, %llu", idx, global_fs_event_cache_manager.tables_for_transport[idx]->transport_id, id);
}
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (tables) {
synchronize_rcu();
mem_free(tables);
}
IPRINTF("release_file_context_entry: %llu", id);
}
static void init_file_context_common_table(file_context_common_table_t *table,
uint8_t hashbits,
unsigned int max_size,
unsigned short clean_count,
unsigned long expire_time_ms)
{
INIT_LIST_HEAD(&table->lru_list);
spin_lock_init(&table->spinlock);
table->hashbits = hashbits;
table->max_size = max_size;
table->clean_count = clean_count;
table->expire_time_jiffies = msecs_to_jiffies(expire_time_ms);
table->size = 0;
__hash_init(table->hashtable, 1 << hashbits);
}
static void init_file_context_tree(file_context_tree_t *tree, unsigned int max_size, unsigned short clean_count)
{
tree->tree = RB_ROOT;
INIT_LIST_HEAD(&tree->lru_list);
spin_lock_init(&tree->spinlock);
tree->max_size = max_size;
tree->clean_count = clean_count;
tree->size = 0;
}
static file_context_big_table_t* init_big_table(unsigned long expire_time_ms)
{
file_context_big_table_t* table = vmem_alloc(sizeof(file_context_big_table_t) + sizeof(struct hlist_head) * (1 << FILE_CONTEXT_BIG_TABLE_SIZE_BITS));
if (!table) {
return NULL;
}
atomic64_add(1, &g_memory_metrics->total_file_contexts_tables);
atomic_set(&table->ref_count, 1);
init_file_context_common_table(&table->common_table,
FILE_CONTEXT_BIG_TABLE_SIZE_BITS,
FILE_CONTEXT_BIG_TABLE_SIZE,
FILE_CONTEXT_BIG_TABLE_LRU_CLEAN_SIZE,
expire_time_ms);
return table;
}
static file_context_tables_t* init_file_context_entry(transport_id_t id)
{
file_context_tables_t* tables = mem_alloc(sizeof(file_context_tables_t));
if (!tables) {
return NULL;
}
*tables = (file_context_tables_t){0};
tables->transport_id = id;
return tables;
}
int acquire_file_context_entry(transport_id_t id)
{
int err = 0;
int idx = transport_id_index(id);
file_context_tables_t *entry = init_file_context_entry(id);
if (!entry) {
return -ENOMEM;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.tables_for_transport[idx])
{
WPRINTF("acquire_file_context_entry: %d, already exists", idx);
err = -EEXIST;
} else {
rcu_assign_pointer(global_fs_event_cache_manager.tables_for_transport[idx], entry);
entry = NULL;
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (entry) {
mem_free(entry);
}
return err;
}
typedef void (*evict_fn_t)(void* ctx, file_context_common_node_t*);
static inline void check_lru(struct list_head* lru_list
, unsigned int size
, unsigned int max_size
, unsigned short clean_count
, unsigned long expire_time_jiffies
, evict_fn_t evict, void* ctx)
{
unsigned long now;
// Remove nodes if LRU list is too large
if (size > max_size) {
while (clean_count) {
file_context_common_node_t *node = list_first_entry_or_null(lru_list, file_context_common_node_t, lru_list_node);
if (!node)
break;
evict(ctx, node);
clean_count--;
}
}
// Clean expired nodes
now = jiffies;
while (clean_count) {
file_context_common_node_t *node = list_first_entry_or_null(lru_list, file_context_common_node_t, lru_list_node);
if (!node)
break;
if (now < expire_time_jiffies + node->last_access_time)
break;
evict(ctx, node);
clean_count--;
}
}
static void lru_evict_ht(void* ctx, file_context_common_node_t* node)
{
file_context_common_table_t *common_table = (file_context_common_table_t *)ctx;
file_context_ht_node_t *ht_node = container_of(node, file_context_ht_node_t, common);
remove_and_put_ht_node(ht_node, common_table);
}
/* This function requires lock*/
static inline void check_common_table_lru(file_context_common_table_t *table)
{
return check_lru(&table->lru_list, table->size, table->max_size, table->clean_count, table->expire_time_jiffies, lru_evict_ht, table);
}
static void insert_ht_node(file_context_common_table_t *common_table,
file_context_ht_node_t *ht_node,
uint64_t key)
{
file_context_ht_node_t *search_node;
file_context_common_node_t *common_node = &ht_node->common;
uint8_t hashbits = common_table->hashbits;
bool inserted = false;
common_node->key = key;
/* RCU WRITER */
spin_lock(&common_table->spinlock);
list_add_tail(&common_node->lru_list_node, &common_table->lru_list);
common_node->lru_list_node_inserted = true;
// iterate through the hashtable to find the same key, if it exists, prefer the new node
hash_for_each_possible_with_hashbits(common_table->hashtable, search_node, node, key, hashbits)
{
if (common_node->key == search_node->common.key)
{
// TODO: it is better to ensure that file_key does not match but this approach is good enough
remove_common_node_from_lru(&search_node->common);
hlist_replace_rcu(&search_node->node, &ht_node->node);
put_ht_node(search_node);
inserted = true;
break;
}
}
if (!inserted)
{
hash_add_rcu_hashbits(common_table->hashtable, &ht_node->node, key, hashbits);
common_table->size += 1;
check_common_table_lru(common_table);
}
spin_unlock(&common_table->spinlock);
/* RCU WRITER */
}
struct tree_evict_context {
file_context_tree_t* tree;
struct list_head* to_free_list;
};
static void lru_evict_tree_node(void* ctx, file_context_common_node_t* node)
{
struct tree_evict_context* context = (struct tree_evict_context*)ctx;
file_context_tree_node_t* tree_node = container_of(node, file_context_tree_node_t, common);
remove_tree_node(context->tree, tree_node);
list_add_tail(&tree_node->free_node, context->to_free_list);
}
/* This function requires lock*/
static inline void check_tree_lru(file_context_tree_t* tree, struct list_head* to_free_list)
{
struct tree_evict_context context = {tree, to_free_list};
return check_lru(&tree->lru_list, tree->size, tree->max_size, tree->clean_count, FILE_CONTEXT_TREE_EXPIRE_TIME_MS, lru_evict_tree_node, &context);
}
static void insert_tree_node(file_context_tree_t* tree,
file_context_tree_node_t* tree_node,
uint64_t key)
{
struct rb_node **link = &(tree->tree.rb_node);
struct rb_node *parent = NULL;
file_context_common_node_t *common_node = &tree_node->common;
bool inserted = false;
LIST_HEAD(to_free_list);
common_node->key = key;
spin_lock(&tree->spinlock);
list_add_tail(&common_node->lru_list_node, &tree->lru_list);
common_node->lru_list_node_inserted = true;
while (*link)
{
file_context_tree_node_t *curr;
parent = *link;
curr = container_of(parent, file_context_tree_node_t, node);
if (key < curr->common.key) {
link = &parent->rb_left;
} else if (key > curr->common.key) {
link = &parent->rb_right;
} else {
// TODO: is this a good approach? maybe keep the entry that was already there?
remove_common_node_from_lru(&curr->common);
rb_replace_node(&curr->node, &tree_node->node, &tree->tree);
RB_CLEAR_NODE(&curr->node);
spin_unlock(&tree->spinlock);
put_tree_node(curr);
inserted = true;
break;
}
}
if (!inserted)
{
rb_link_node(&tree_node->node, parent, link);
rb_insert_color(&tree_node->node, &tree->tree);
tree->size += 1;
check_tree_lru(tree, &to_free_list);
}
spin_unlock(&tree->spinlock);
while (!list_empty(&to_free_list))
{
file_context_tree_node_t *to_free_node = list_first_entry(&to_free_list, file_context_tree_node_t, free_node);
list_del(&to_free_node->free_node);
put_tree_node(to_free_node);
}
}
static void init_common_node(file_context_common_node_t *node)
{
node->last_access_time = jiffies;
atomic_set(&node->ref_count, 1);
}
static void init_ht_node(file_context_ht_node_t *node, file_contexts_rcu_free_func_t free_func)
{
node->free_func = free_func;
init_common_node(&node->common);
}
static void init_tree_node(file_context_tree_node_t* node, file_contexts_tree_free_func_t free_func)
{
node->free_func = free_func;
init_common_node(&node->common);
}
static inline file_context_ht_node_t *find_hash_node(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
file_context_ht_node_t *tmp = NULL;
hash_for_each_possible_with_hashbits(head, tmp, node, key, hashbits)
{
if (tmp->common.key == key)
{
return tmp;
}
}
return NULL;
}
static inline file_context_ht_node_t *find_hash_node_rcu(struct hlist_head *head, uint64_t key, uint8_t hashbits)
{
file_context_ht_node_t *tmp = NULL;
hash_for_each_possible_rcu_with_hashbits(head, tmp, node, key, hashbits)
{
if (tmp->common.key == key)
{
return tmp;
}
}
return NULL;
}
static inline file_context_common_node_t *lookup_common_node(file_context_common_table_t *table, uint64_t key, uint8_t hashbits)
{
file_context_ht_node_t *ht_node = NULL;
file_context_common_node_t *common_node = NULL;
/* RCU READER */
rcu_read_lock();
ht_node = find_hash_node_rcu(table->hashtable, key, hashbits);
if (ht_node)
{
if (!get_ht_node_rcu(ht_node)) {
ht_node = NULL;
}
}
rcu_read_unlock();
/* RCU READER */
if (ht_node)
{
common_node = &ht_node->common;
spin_lock(&table->spinlock);
common_node->last_access_time = jiffies;
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
list_add_tail(&common_node->lru_list_node, &table->lru_list);
}
spin_unlock(&table->spinlock);
}
return common_node;
}
// Must be called under tree lock
static struct rb_node* lookup_tree_rbnode(struct rb_root* tree, uint64_t key)
{
struct rb_node *node = tree->rb_node;
while (node) {
file_context_tree_node_t *tree_node = container_of(node, file_context_tree_node_t, node);
if (key < tree_node->common.key) {
node = node->rb_left;
} else if (key > tree_node->common.key) {
node = node->rb_right;
} else {
return &tree_node->node;
}
}
return NULL;
}
static file_context_common_node_t *lookup_tree_node(file_context_tree_t *tree, uint64_t key)
{
struct rb_node *node;
file_context_tree_node_t* tree_node = NULL;
spin_lock(&tree->spinlock);
node = lookup_tree_rbnode(&tree->tree, key);
if (node) {
tree_node = container_of(node, file_context_tree_node_t, node);
get_tree_node(tree_node);
}
spin_unlock(&tree->spinlock);
return tree_node ? &tree_node->common : NULL;
}
typedef struct erase_tree_result {
int size;
bool erased;
} erase_tree_result_t;
static erase_tree_result_t erase_tree_node(file_context_tree_t *tree, uint64_t key)
{
struct rb_node *tree_node = NULL;
file_context_tree_node_t* node = NULL;
erase_tree_result_t result = {0};
spin_lock(&tree->spinlock);
tree_node = lookup_tree_rbnode(&tree->tree, key);
if (tree_node) {
node = container_of(tree_node, file_context_tree_node_t, node);
remove_tree_node(tree, node);
result.erased = true;
result.size = tree->size;
}
spin_unlock(&tree->spinlock);
if (node)
put_tree_node(node);
return result;
}
static void lookup_common_node_all(file_context_table_type_t type
, uint64_t key
, file_context_common_node_t **common_nodes
, const transport_ids_t* ids
, bool* found_all)
{
int idx = 0;
*found_all = true;
for (; idx < MAX_TRANSPORT_SIZE; idx++)
{
transport_id_t transport_id = ids->ids[idx];
file_context_big_table_t *table;
if (!transport_id)
continue;
table = get_file_context_big_table(transport_id, type);
common_nodes[idx] = NULL;
if (table)
{
file_context_common_node_t* node = lookup_common_node(&table->common_table, key, table->common_table.hashbits);
if (node) {
common_nodes[idx] = node;
} else {
*found_all = false;
}
put_file_context_big_table(table);
} else {
*found_all = false;
}
}
}
static int remove_common_node_by_key(file_context_common_table_t *common_table, uint64_t key)
{
file_context_ht_node_t *ht_node = NULL;
/* RCU WRITER */
spin_lock(&common_table->spinlock);
ht_node = find_hash_node(common_table->hashtable, key, common_table->hashbits);
if (ht_node)
{
file_context_common_node_t *common_node = &ht_node->common;
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
hash_del_rcu(&ht_node->node);
common_table->size -= 1;
}
spin_unlock(&common_table->spinlock);
/* RCU WRITER */
if (ht_node)
{
put_ht_node(ht_node);
return 0;
}
return -ENOENT;
}
// this function should be called inside common_table->spinlock
// This function is currently specialized for "modify close" cache, you likely do not need to use it elsewhere.
static void remove_common_node(file_context_common_table_t *common_table, file_context_common_node_t* common_node)
{
file_context_ht_node_t *ht_node = container_of(common_node, file_context_ht_node_t, common);
bool need_to_put = false;
spin_lock(&common_table->spinlock);
if (common_node->lru_list_node_inserted) {
list_del(&common_node->lru_list_node);
common_node->lru_list_node_inserted = false;
}
// This check is important because node might have been already removed due to LRU eviction
if (!hlist_unhashed(&ht_node->node)) {
hash_del_rcu(&ht_node->node);
common_table->size -= 1;
need_to_put = true;
}
spin_unlock(&common_table->spinlock);
if (need_to_put) {
put_ht_node(ht_node);
}
}
static int remove_common_cache(int idx, uint64_t key, file_context_table_type_t type)
{
int ret = 0;
file_context_big_table_t *table = get_file_context_big_table_by_idx(idx, type);
if (!table)
{
return -ENOENT;
}
ret = remove_common_node_by_key(&table->common_table, key);
if (ret == 0)
{
DPRINTF("remove_common_cache[%d]: %llu", idx, key);
}
put_file_context_big_table(table);
return ret;
}
void remove_common_cache_all(const file_key_t* file_key)
{
uint64_t key = file_key->ptr;
int i = 0;
for (; i < MAX_TRANSPORT_SIZE; i++)
{
remove_common_cache(i, key, FILE_CONTEXT_OPEN_TABLE);
remove_common_cache(i, key, FILE_CONTEXT_READ_TABLE);
remove_common_cache(i, key, FILE_CONTEXT_WRITE_TABLE);
}
{
file_context_big_table_t *table = get_file_context_close_modified_table();
if (table)
{
remove_common_node_by_key(&table->common_table, key);
put_file_context_big_table(table);
}
}
}
static inline int cmp_file_context_key(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cache_key->file_key.ptr != current_key->file_key.ptr ||
cache_key->file_key.ino != current_key->file_key.ino ||
cache_key->file_key.gen != current_key->file_key.gen ||
cache_key->file_key.dev != current_key->file_key.dev)
{
return -1;
}
return 0;
}
static inline int cmp_file_context_update_time(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cache_key->i_mtime.tv_sec != current_key->i_mtime.tv_sec ||
cache_key->i_mtime.tv_nsec != current_key->i_mtime.tv_nsec ||
cache_key->i_ctime.tv_sec != current_key->i_ctime.tv_sec ||
cache_key->i_ctime.tv_nsec != current_key->i_ctime.tv_nsec)
{
return -1;
}
return 0;
}
static inline int cmp_file_context(const file_context_key_t *cache_key, const file_context_key_t *current_key)
{
if (cmp_file_context_key(cache_key, current_key) == 0 &&
cmp_file_context_update_time(cache_key, current_key) == 0)
{
return 0;
}
return -1;
}
static bool deadline_valid(unsigned long deadline)
{
return time_after(deadline, jiffies);
}
static bool flags_valid(int expected_flags, int having_flags)
{
return (expected_flags & having_flags) == expected_flags;
}
static inline file_context_open_file_node_t *to_open_file_node(file_context_common_node_t *common_node)
{
return container_of(common_node, file_context_open_file_node_t, node.common);
}
static inline void put_open_file_node(file_context_open_file_node_t *file_node)
{
put_ht_node(&file_node->node);
}
static inline file_context_open_process_node_t *to_open_process_node(file_context_common_node_t *common_node)
{
return container_of(common_node, file_context_open_process_node_t, node.common);
}
static inline void put_open_process_node(file_context_open_process_node_t *process_node)
{
put_tree_node(&process_node->node);
}
bool check_open_cache(const transport_ids_t* ids, file_context_info_t *info)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip;
int id = 0;
// skip is mapped into 'found_all'. if not all were found, skip is false
lookup_common_node_all(FILE_CONTEXT_OPEN_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
file_context_open_file_node_t *file_node = NULL;
file_context_open_process_node_t *process_node = NULL;
file_context_common_node_t *common_node = NULL;
if (common_nodes[id] == NULL)
{
continue;
}
file_node = to_open_file_node(common_nodes[id]);
// for open events caching, make sure that both file_ptr key and times match
if (cmp_file_context(&file_node->key, &info->msg_info.key) != 0)
{
skip = false;
put_open_file_node(file_node);
continue;
}
if (deadline_valid(file_node->data.deadline) && flags_valid(info->params.open.flags, atomic_read(&file_node->data.flags)))
{
info->msg_info.skipped_transport_ids[id] = ids->ids[id];
put_open_file_node(file_node);
continue;
}
common_node = lookup_tree_node(&file_node->process_lookup, info->pid_key);
put_open_file_node(file_node);
if (!common_node)
{
skip = false;
continue;
}
process_node = to_open_process_node(common_node);
if (deadline_valid(process_node->data.deadline) && flags_valid(info->params.open.flags, atomic_read(&process_node->data.flags)))
{
info->msg_info.skipped_transport_ids[id] = ids->ids[id];
}
else
{
skip = false;
}
put_open_process_node(process_node);
}
return skip;
}
static void open_process_node_free(file_context_tree_node_t* node)
{
file_context_open_process_node_t *process_node = container_of(node, file_context_open_process_node_t, node);
KMEM_DELETE(file_context_open_process_node, process_node);
}
static inline file_context_open_process_node_t *add_open_process_node(file_context_tree_t *tree,
uint64_t pid_key)
{
file_context_open_process_node_t *open_process_node = KMEM_NEW0(file_context_open_process_node);
if (!open_process_node)
return NULL;
init_tree_node(&open_process_node->node, open_process_node_free);
get_tree_node(&open_process_node->node);
atomic_set(&open_process_node->data.flags, 0);
WRITE_ONCE(open_process_node->data.deadline, 0);
insert_tree_node(tree, &open_process_node->node, pid_key);
return open_process_node;
}
static void file_context_open_file_node_free(struct rcu_head *rcu)
{
file_context_open_file_node_t *file_node = container_of(rcu, file_context_open_file_node_t, node.rcu);
clear_tree_nolock(&file_node->process_lookup);
KMEM_DELETE(file_context_open_file_node, file_node);
}
static inline file_context_open_file_node_t *add_open_file_node(file_context_common_table_t *table,
const file_context_key_t *key)
{
file_context_ht_node_t* ht_node;
file_context_open_file_node_t *open_file_node = KMEM_NEW0(file_context_open_file_node);
if (!open_file_node)
return NULL;
ht_node = &open_file_node->node;
init_ht_node(ht_node, file_context_open_file_node_free);
get_ht_node(ht_node);
open_file_node->key = *key;
atomic_set(&open_file_node->data.flags, 0);
WRITE_ONCE(open_file_node->data.deadline, 0);
init_file_context_tree(&open_file_node->process_lookup, FILE_CONTEXT_PROCESS_TREE_MAX_SIZE, FILE_CONTEXT_PROCESS_TREE_LRU_CLEAN_SIZE);
insert_ht_node(table, ht_node, key->file_key.ptr);
return open_file_node;
}
static void add_open_node(file_context_big_table_t *table,
const file_context_key_t *key, uint64_t pid_key,
file_context_open_file_node_t **pfile_node,
file_context_open_process_node_t **pprocess_node)
{
file_context_open_file_node_t *tmp_file_node = NULL;
file_context_open_process_node_t *tmp_process_node = NULL;
// Create or find the open node per file key...
{
file_context_common_node_t *common_node = lookup_common_node(&table->common_table, key->file_key.ptr, table->common_table.hashbits);
if (common_node)
{
tmp_file_node = to_open_file_node(common_node);
// If file_key mismatches, reinsert the node so 'forget' that we found a common_node
if (cmp_file_context(&tmp_file_node->key, key) != 0)
{
put_open_file_node(tmp_file_node);
common_node = NULL;
}
}
if (common_node)
{
tmp_file_node = to_open_file_node(common_node);
}
else
{
tmp_file_node = add_open_file_node(&table->common_table, key);
}
*pfile_node = tmp_file_node;
}
// ...and, if asked, process node by pid_key
if (pprocess_node)
{
file_context_common_node_t *common_node = lookup_tree_node(&tmp_file_node->process_lookup, pid_key);
if (common_node)
{
tmp_process_node = to_open_process_node(common_node);
}
else
{
tmp_process_node = add_open_process_node(&tmp_file_node->process_lookup, pid_key);
}
*pprocess_node = tmp_process_node;
}
}
static unsigned long to_deadline(uint32_t ttl_s)
{
unsigned long cur_jiffies = jiffies;
// If ttl_s is 0, it means that the entry should be valid forever so set it to some very big time
if (ttl_s == 0)
return cur_jiffies + msecs_to_jiffies(FILE_CONTEXT_LONGEST_EXPIRE_TIME_MS);
else
return cur_jiffies + msecs_to_jiffies(ttl_s * 1000);
}
int add_open_cache(transport_id_t id, const file_context_add_cache_request_t* info)
{
unsigned long deadline;
int ret = 0;
file_context_open_file_node_t* file_node = NULL;
file_context_open_process_node_t* process_node = NULL;
file_context_big_table_t *table = get_file_context_big_table(id, FILE_CONTEXT_OPEN_TABLE);
if (!table)
return -ENOENT;
deadline = to_deadline(info->ttl_s);
add_open_node(table, &info->key, info->pid_key, &file_node, info->pid_key ? &process_node : NULL);
put_file_context_big_table(table);
if (info->pid_key) {
if (process_node) {
atomic_or_compat(info->params.open.flags, &process_node->data.flags);
WRITE_ONCE(process_node->data.deadline, deadline);
} else {
ret = -ENOMEM;
}
} else {
if (file_node) {
atomic_or_compat(info->params.open.flags, &file_node->data.flags);
WRITE_ONCE(file_node->data.deadline, deadline);
} else {
ret = -ENOMEM;
}
}
if (file_node)
put_open_file_node(file_node);
if (process_node)
put_open_process_node(process_node);
return ret;
}
/* This function requires lock*/
static interval_node_t *malloc_interval_node(uint64_t low, uint64_t high, interval_set_t *set)
{
interval_node_t *node = KMEM_NEW(interval_node);
if (!node)
{
return NULL;
}
RB_CLEAR_NODE(&node->rb);
node->low = low;
node->high = high;
set->interval_count++;
#ifdef INTERVAL_SET_DEBUG
set->total_interval_size += (node->high - node->low);
#endif
return node;
}
/* This function requires lock*/
static void remove_interval_node(struct rb_node *rb_node, interval_set_t *set)
{
interval_node_t *node;
if (!rb_node)
{
return;
}
node = rb_entry(rb_node, interval_node_t, rb);
set->interval_count--;
#ifdef INTERVAL_SET_DEBUG
set->total_interval_size -= (node->high - node->low);
#endif
rb_erase(rb_node, &set->root);
KMEM_DELETE(interval_node, node);
}
/* This function requires lock*/
void clean_interval_tree(interval_set_t *set)
{
struct rb_node *rb_node = set->root.rb_node;
while (rb_node)
{
remove_interval_node(rb_node, set);
rb_node = set->root.rb_node;
}
}
// node contains (low, high)
static bool contain(interval_node_t *node, uint64_t low, uint64_t high)
{
if (node->low <= low && high <= node->high)
return true;
return false;
}
#ifndef list_last_entry
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
#endif
/* This function requires lock
For each node and new node, possible situation:
1. node contains new node, return true
2. new node contains node, remove overlapped node, check left and right
3. new node is less/greater than node, check left/right
4. new node is left/right overlapped with node, extend new node's low/high, remove node, check left/right
*/
static bool check_overlap(uint64_t *low, uint64_t *high, struct rb_root *root, struct list_head *del_list)
{
interval_node_t *cur, *next;
struct list_head stack;
struct rb_node *rb_node;
INIT_LIST_HEAD(&stack);
rb_node = root->rb_node;
if (!rb_node)
{
return false;
}
cur = rb_entry(rb_node, interval_node_t, rb);
list_add_tail(&cur->stack_node, &stack);
while (!list_empty(&stack))
{
cur = list_last_entry(&stack, interval_node_t, stack_node);
list_del(&cur->stack_node);
// assume that s0 is current node, s1 is new node
// current node contains new node
/*
tree:
(14,20)
(4,7) (21,22)
(1,3) (9,13)
stack: (14,20)
new node: (17,18)
(17,18) is contained by (14,20)
____s0---s1=s1--s0____
14 17 18 20
do nothing, return true
*/
if (contain(cur, *low, *high))
{
return true;
}
// new node contains current node
/*
tree:
(14,17)
(4,7) (18,19)
(1,3) (9,13)
stack: (14,17)
new node: (10,20)
(10,20) contains (14,17)
____s1====s0---s0===s1____
10 14 17 20
after operation:
deleted list: (14,17)
stack: (18,19), (4,7)
*/
if ((*low < cur->low) && (*high > cur->high))
{
list_add_tail(&cur->del_list_node, del_list);
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is less than current node
/*
tree:
(4,7)
(1,3) (9,13)
stack: (4,7)
new node: (0,2)
(0,2) is less than (4,7)
____s1==s1__s0---s0____
0 2 4 7
after operation:
deleted list:
stack: (1,3)
*/
if (*high < cur->low)
{
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is left overlapped with current node
/*
tree:
(4,8)
(1,3) (9,13)
stack: (4,8)
new node: (2,6)
(2,6) is left overlapped with (4,8)
____s1==s0xxs1--s0____
2 4 6 8
after operation:
new node->(2, 8)
deleted list: (4,8)
stack: (1,3)
*/
else if (*high <= cur->high)
{
list_add_tail(&cur->del_list_node, del_list);
*high = cur->high;
if (cur->rb.rb_left)
{
next = rb_entry(cur->rb.rb_left, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is greater than node
/*
tree:
(10,17)
(4,7) (18,19)
stack: (10,17)
new node: (20,30)
(20,30) is greater than (10,17)
____s0-------s0___s1=========s1____
10 17 20 30
after operation:
deleted list:
stack: (18,19)
*/
if (*low > cur->high)
{
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
// new node is right overlapped with node
/*
tree:
(10,17)
(4,7) (18,19)
stack: (10,17)
new node: (15,30)
(15,30) is right overlapped with (10,17)
s1
____s0-----s1xxs0=========s1____
10 15 17 30
after operation:
new node->(10, 30)
deleted list:(10,17)
stack: (18,19)
*/
else if (cur->low <= *low)
{
list_add_tail(&cur->del_list_node, del_list);
*low = cur->low;
if (cur->rb.rb_right)
{
next = rb_entry(cur->rb.rb_right, interval_node_t, rb);
list_add_tail(&next->stack_node, &stack);
}
continue;
}
EPRINTF("Something wrong");
}
return false;
}
/* This function requires lock
1. find the overlapped interval
2. remove overlapped interval
3. insert new interval
*/
bool insert_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
struct rb_node **new, *parent = NULL;
interval_node_t *new_node, *cur_node;
struct list_head del_list;
uint64_t l = low, h = high;
INIT_LIST_HEAD(&del_list);
if (check_overlap(&l, &h, &set->root, &del_list))
{
return true;
}
new_node = malloc_interval_node(l, h, set);
if (!new_node)
{
return false;
}
while (!list_empty(&del_list))
{
// erase from leaf node
cur_node = list_last_entry(&del_list, interval_node_t, del_list_node);
list_del(&cur_node->del_list_node);
remove_interval_node(&cur_node->rb, set);
}
new = &set->root.rb_node;
while (*new)
{
parent = *new;
cur_node = rb_entry(parent, interval_node_t, rb);
if (new_node->high < cur_node->low)
new = &parent->rb_left;
else
new = &parent->rb_right;
}
rb_link_node(&new_node->rb, parent, new);
rb_insert_color(&new_node->rb, &set->root);
return true;
}
/* This function requires lock
1. node contains new node, return true
2. less/greater, check left/right
3. overlapped/new node contains node, return false
*/
bool check_interval(uint64_t low, uint64_t high, interval_set_t *set)
{
interval_node_t *cur;
struct rb_node *rb_node = set->root.rb_node;
while (rb_node)
{
cur = rb_entry(rb_node, interval_node_t, rb);
if (contain(cur, low, high))
return true;
if (high < cur->low)
{
rb_node = rb_node->rb_left;
}
else if (low > cur->high)
{
rb_node = rb_node->rb_right;
}
else
{
// overlapped
return false;
}
}
return false;
}
typedef struct rounded_interval_s
{
uint64_t low;
uint64_t high;
} rounded_interval_t;
static bool try_round_interval(uint64_t low, uint64_t high, loff_t file_size, rounded_interval_t* out)
{
if (file_size <= 0
|| file_size > FILE_CONTEXT_MAX_FILE_SIZE
|| low > (uint64_t)file_size
|| high < low)
{
return false;
}
out->low = FILE_CONTEXT_CHUNK_LOWER_BOUND(low);
out->high = FILE_CONTEXT_CHUNK_UPPER_BOUND(high);
return true;
}
static file_context_rw_node_t* to_rw_node(file_context_common_node_t *common_node)
{
return container_of(common_node, file_context_rw_node_t, node.common);
}
static void put_rw_node(file_context_rw_node_t *rw_node)
{
put_ht_node(&rw_node->node);
}
static file_context_rw_node_t* add_rw_cache_node(transport_id_t id, const file_context_key_t* key, file_context_table_type_t type);
/*
Send read events only the first time the file is fully read
The interval will be reset when the file is changed
Skip event when return is true.
*/
bool check_and_update_read_cache(const transport_ids_t* ids, file_context_info_t *info)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip = true;
bool found_all = false;
int id = 0;
// TODO: This cast is unsafe, ask to explicitly provide file_size instead
loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
rounded_interval_t interval;
if (!try_round_interval(info->params.rw.low, info->params.rw.high, file_size, &interval))
{
return true;
}
lookup_common_node_all(FILE_CONTEXT_READ_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &found_all);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
transport_id_t transport_id = ids->ids[id];
file_context_rw_node_t *read_node = NULL;
// By default skip the read event
info->msg_info.skipped_transport_ids[id] = transport_id;
if (common_nodes[id])
{
read_node = to_rw_node(common_nodes[id]);
if (cmp_file_context_key(&read_node->key, &info->msg_info.key) != 0)
{
put_rw_node(read_node);
common_nodes[id] = NULL;
read_node = NULL;
}
}
if (common_nodes[id] == NULL)
{
read_node = add_rw_cache_node(ids->ids[id], &info->msg_info.key, FILE_CONTEXT_READ_TABLE);
if (read_node)
{
common_nodes[id] = &read_node->node.common;
}
}
if (common_nodes[id] == NULL)
{
continue;
}
spin_lock(&read_node->data.spinlock);
insert_interval(interval.low, interval.high, &read_node->data.interval_set);
if (check_interval(0, file_size, &read_node->data.interval_set))
{
if (atomic_cmpxchg(&read_node->data.is_reported, false, true) == false)
{
// send full read event by setting this flag
info->msg_info.skipped_transport_ids[id] = 0;
skip = false;
}
}
spin_unlock(&read_node->data.spinlock);
put_rw_node(read_node);
}
return skip;
}
// Skip event when return is true.
bool check_write_cache(const transport_ids_t* ids, file_context_info_t *info)
{
file_context_common_node_t *common_nodes[MAX_TRANSPORT_SIZE] = {0};
bool skip;
int id = 0;
// TODO: This cast is unsafe, ask to explicitly provide file_size instead
loff_t file_size = i_size_read((const struct inode *)info->msg_info.key.file_key.ptr);
if (file_size > FILE_CONTEXT_MAX_FILE_SIZE) {
// Currently we are not handling any files larger than FILE_CONTEXT_MAX_FILE_SIZE
return true;
}
lookup_common_node_all(FILE_CONTEXT_WRITE_TABLE, info->msg_info.key.file_key.ptr, common_nodes, ids, &skip);
for (id = 0; id < MAX_TRANSPORT_SIZE; id++)
{
file_context_rw_node_t *write_node = NULL;
if (common_nodes[id] == NULL)
{
continue;
}
write_node = to_rw_node(common_nodes[id]);
if (cmp_file_context_key(&write_node->key, &info->msg_info.key) == 0)
{
spin_lock(&write_node->data.spinlock);
if (check_interval(info->params.rw.low, info->params.rw.high, &write_node->data.interval_set))
{
info->msg_info.skipped_transport_ids[id] = ids->ids[id];
}
else
{
skip = false;
}
spin_unlock(&write_node->data.spinlock);
}
else
{
skip = false;
}
put_rw_node(write_node);
}
return skip;
}
static void rw_node_free(struct rcu_head *rcu)
{
file_context_rw_node_t *rw_node = container_of(rcu, file_context_rw_node_t, node.rcu);
clean_interval_tree(&rw_node->data.interval_set);
KMEM_DELETE(file_context_rw_node, rw_node);
}
static file_context_rw_node_t *add_rw_node(file_context_big_table_t* table, const file_context_key_t* key)
{
file_context_rw_node_t *node = NULL;
file_context_common_node_t *common_node = lookup_common_node(&table->common_table, key->file_key.ptr, table->common_table.hashbits);
if (common_node)
{
node = to_rw_node(common_node);
// For 'read' and 'write' events, time changes are not important.
// It is the fact that particular region was accessed that matters
if (cmp_file_context_key(&node->key, key) != 0)
{
put_rw_node(node);
node = NULL;
}
}
if (!node)
{
node = KMEM_NEW0(file_context_rw_node);
if (node)
{
file_context_ht_node_t* ht_node = &node->node;
init_ht_node(ht_node, rw_node_free);
get_ht_node(ht_node);
node->key = *key;
spin_lock_init(&node->data.spinlock);
node->data.interval_set.root = RB_ROOT;
insert_ht_node(&table->common_table, ht_node, key->file_key.ptr);
}
}
return node;
}
static file_context_rw_node_t* add_rw_cache_node(transport_id_t id, const file_context_key_t* key, file_context_table_type_t type)
{
file_context_rw_node_t *node = NULL;
file_context_big_table_t *table = get_file_context_big_table(id, type);
if (!table) {
return NULL;
}
node = add_rw_node(table, key);
put_file_context_big_table(table);
return node;
}
int add_write_cache(transport_id_t id, const file_context_add_cache_request_t* info)
{
// TODO: This cast is unsafe, ask to explicitly provide file_size instead
loff_t file_size = i_size_read((const struct inode *)info->key.file_key.ptr);
rounded_interval_t interval;
file_context_rw_node_t *node = NULL;
if (!try_round_interval(info->params.rw.low, info->params.rw.high, file_size, &interval))
return -EINVAL;
node = add_rw_cache_node(id, &info->key, FILE_CONTEXT_WRITE_TABLE);
if (!node)
return -ENOMEM;
spin_lock(&node->data.spinlock);
insert_interval(interval.low, interval.high, &node->data.interval_set);
spin_unlock(&node->data.spinlock);
put_rw_node(node);
return 0;
}
static void process_node_free(file_context_tree_node_t* node)
{
file_context_process_node_t *process_node = container_of(node, file_context_process_node_t, node);
KMEM_DELETE(file_context_process_node, process_node);
}
static file_context_process_node_t* to_process_node(file_context_common_node_t *common_node)
{
return container_of(common_node, file_context_process_node_t, node.common);
}
static inline file_context_process_node_t *lookup_or_add_process_node(file_context_tree_t *tree,
uint64_t pid_key)
{
file_context_process_node_t *process_node = NULL;
file_context_common_node_t *common_node = NULL;
common_node = lookup_tree_node(tree, pid_key);
if (common_node)
{
process_node = to_process_node(common_node);
}
if (!process_node)
{
file_context_tree_node_t* tree_node;
process_node = KMEM_NEW0(file_context_process_node);
if (!process_node)
return NULL;
tree_node = &process_node->node;
init_tree_node(tree_node, process_node_free);
get_tree_node(tree_node);
insert_tree_node(tree, tree_node, pid_key);
}
return process_node;
}
static void file_modify_node_free(struct rcu_head *rcu)
{
file_context_file_modify_node_t *file_node = container_of(rcu, file_context_file_modify_node_t, node.rcu);
clear_tree_nolock(&file_node->process_lookup);
KMEM_DELETE(file_context_file_modify_node, file_node);
}
static inline file_context_file_modify_node_t* to_file_modify_node(file_context_common_node_t *common_node)
{
return container_of(common_node, file_context_file_modify_node_t, node.common);
}
static void put_file_modify_node(file_context_file_modify_node_t *file_node)
{
put_ht_node(&file_node->node);
}
static file_context_file_modify_node_t *lookup_or_add_file_modify_node(file_context_common_table_t *table, const file_context_key_t* key)
{
file_context_file_modify_node_t *tmp_file_node = NULL;
file_context_common_node_t *common_node = NULL;
common_node = lookup_common_node(table, key->file_key.ptr, table->hashbits);
if (common_node)
{
tmp_file_node = to_file_modify_node(common_node);
// If file_key mismatches, reinsert the node so 'forget' that we found a common_node
if (cmp_file_context_key(&tmp_file_node->key, key) != 0)
{
put_file_modify_node(tmp_file_node);
tmp_file_node = NULL;
}
}
if (!tmp_file_node)
{
file_context_ht_node_t* ht_node;
tmp_file_node = KMEM_NEW0(file_context_file_modify_node);
if (!tmp_file_node)
return NULL;
ht_node = &tmp_file_node->node;
init_ht_node(ht_node, file_modify_node_free);
get_ht_node(ht_node);
tmp_file_node->key = *key;
init_file_context_tree(&tmp_file_node->process_lookup, FILE_CONTEXT_PROCESS_TREE_MAX_SIZE, FILE_CONTEXT_PROCESS_TREE_LRU_CLEAN_SIZE);
insert_ht_node(table, ht_node, key->file_key.ptr);
}
return tmp_file_node;
}
static inline void put_process_node(file_context_process_node_t *process_node)
{
return put_tree_node(&process_node->node);
}
static bool add_file_modify_cache_(file_context_big_table_t *table, const file_context_add_cache_request_t *info)
{
file_context_file_modify_node_t *tmp_file_node = NULL;
file_context_process_node_t *tmp_process_node = NULL;
tmp_file_node = lookup_or_add_file_modify_node(&table->common_table, &info->key);
if (!tmp_file_node)
{
return false;
}
tmp_process_node = lookup_or_add_process_node(&tmp_file_node->process_lookup, info->pid_key);
if (!tmp_process_node)
{
put_file_modify_node(tmp_file_node);
return false;
}
put_process_node(tmp_process_node);
put_file_modify_node(tmp_file_node);
return true;
}
bool add_file_modify_cache(const file_context_add_cache_request_t *info)
{
bool ret = false;
file_context_big_table_t *table = get_file_context_close_modified_table();
if (!table)
{
return false;
}
ret = add_file_modify_cache_(table, info);
if (ret)
{
DPRINTF("add_file_modify_cache: %llu", info->key.file_key.ptr);
}
else
{
EPRINTF("add_file_modify_cache failed: %llu", info->key.file_key.ptr);
}
put_file_context_big_table(table);
return ret;
}
// This function should be called in close, it will remove the process/file from tables
bool check_update_file_modify_cache(file_context_info_t *info)
{
bool modified = false;
file_context_common_node_t *common_node = NULL;
file_context_file_modify_node_t *file_node = NULL;
erase_tree_result_t erase_result;
file_context_big_table_t *table = get_file_context_close_modified_table();
if (!table)
{
return false;
}
common_node = lookup_common_node(&table->common_table, info->msg_info.key.file_key.ptr, table->common_table.hashbits);
if (!common_node)
{
goto out_free_table;
}
file_node = to_file_modify_node(common_node);
if (cmp_file_context_key(&file_node->key, &info->msg_info.key) != 0)
{
goto out_free_file;
}
erase_result = erase_tree_node(&file_node->process_lookup, info->pid_key);
if (!erase_result.erased)
{
goto out_free_file;
}
modified = true;
// TODO: This is a race with someone who wants to insert new process node - add a mutex around MODIFY calls.
if (erase_result.size == 0) {
remove_common_node(&table->common_table, common_node);
}
out_free_file:
put_file_modify_node(file_node);
out_free_table:
put_file_context_big_table(table);
return modified;
}
int acquire_file_modify_entry(void)
{
file_context_big_table_t* table = init_big_table(FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS);
if (!table)
{
EPRINTF("acquire_file_modify_entry: init_file_context_entry failed");
return -ENOMEM;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.close_modified_table)
{
WPRINTF("acquire_file_context_entry already exists");
}
else
{
rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table, table);
table = NULL;
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (table)
{
put_file_context_big_table(table);
}
return 0;
}
void release_file_modify_entry(void)
{
file_context_big_table_t *table = NULL;
spin_lock(&global_fs_event_cache_manager.writer_lock);
if (global_fs_event_cache_manager.close_modified_table)
{
table = global_fs_event_cache_manager.close_modified_table;
rcu_assign_pointer(global_fs_event_cache_manager.close_modified_table , NULL);
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
if (table)
{
put_file_context_big_table(table);
}
}
static unsigned long to_expire_time_ms(file_context_table_type_t type)
{
switch (type)
{
case FILE_CONTEXT_OPEN_TABLE:
return FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS;
case FILE_CONTEXT_READ_TABLE:
case FILE_CONTEXT_WRITE_TABLE:
return FILE_CONTEXT_RW_TABLE_EXPIRE_TIME_MS;
return FILE_CONTEXT_RW_TABLE_EXPIRE_TIME_MS;
}
return FILE_CONTEXT_BIG_TABLE_EXPIRE_TIME_MS;
}
int acquire_file_context_table(transport_id_t transport_id, file_context_table_type_t type)
{
int ret = -EFAULT;
int idx = transport_id_index(transport_id);
file_context_tables_t *tables;
file_context_big_table_t* table = init_big_table(to_expire_time_ms(type));
if (!table) {
return -ENOMEM;
}
spin_lock(&global_fs_event_cache_manager.writer_lock);
tables = global_fs_event_cache_manager.tables_for_transport[idx];
if (tables && tables->transport_id == transport_id)
{
file_context_big_table_t** ptable = NULL;
switch (type)
{
case FILE_CONTEXT_OPEN_TABLE:
ptable = &tables->open_table;
break;
case FILE_CONTEXT_READ_TABLE:
ptable = &tables->read_table;
break;
case FILE_CONTEXT_WRITE_TABLE:
ptable = &tables->write_table;
break;
}
// Check if table pointer is sane + table does not exist yet
if (ptable && !(*ptable))
{
rcu_assign_pointer(*ptable, table);
table = NULL;
ret = 0;
}
}
spin_unlock(&global_fs_event_cache_manager.writer_lock);
// failure condition when table was not assigned
if (table)
{
put_file_context_big_table(table);
}
return ret;
}