diff --git a/modules/exasock/exasock-dst.c b/modules/exasock/exasock-dst.c index 3e606b6..1bd059d 100644 --- a/modules/exasock/exasock-dst.c +++ b/modules/exasock/exasock-dst.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -89,34 +90,41 @@ struct exasock_dst_queue_entry struct list_head list; }; -static struct exasock_dst_entry **dst_table; -static size_t dst_table_size; -static struct list_head dst_entries; /* Sorted by last used time */ -static struct list_head * dst_neigh_hash; -static DEFINE_SPINLOCK( dst_lock); -static struct timer_list dst_expiry_timer; -static bool dst_expiry_timer_running; - -/* Shared memory for user to notify kernel of exa_dst_entry usage */ -static uint8_t * dst_used_flags; +struct exasock_dst_net +{ + struct net *net; + struct exasock_dst_entry **table; + size_t table_size; + struct list_head entries; /* Sorted by last used time */ + struct list_head * neigh_hash; + spinlock_t lock; + struct timer_list expiry_timer; + bool expiry_timer_running; + + /* Shared memory for user to notify kernel of exa_dst_entry usage */ + uint8_t * used_flags; + + /* User-visible copy of the destination table */ + struct exa_dst_entry *user_table; +}; -/* User-visible copy of the destination table */ -static struct exa_dst_entry * dst_user_table; +int exasock_dst_net_id __read_mostly; static void __update_user_dst_entry( + struct exasock_dst_net *en, #ifndef __HAS_RT_TABLE_ID bool default_rt, #endif unsigned int idx) { - if (dst_table[idx]) + if (en->table[idx]) { - struct neighbour *neigh = dst_table[idx]->neigh; + struct neighbour *neigh = en->table[idx]->neigh; #if defined(__HAS_OLD_NETCORE) || defined(__HAS_RT_TABLE_ID) - struct rtable *rt = dst_table[idx]->rt; + struct rtable *rt = en->table[idx]->rt; #endif #ifndef __HAS_OLD_NETCORE - struct flowi4 fl4 = dst_table[idx]->fl4; + struct flowi4 fl4 = en->table[idx]->fl4; uint32_t dst_addr = fl4.daddr; uint32_t src_addr = fl4.saddr; #else @@ -124,21 +132,21 @@ static void __update_user_dst_entry( uint32_t src_addr = rt->rt_src; #endif - if (dst_user_table[idx].dst_addr == dst_addr && - dst_user_table[idx].src_addr == src_addr && - memcmp(dst_user_table[idx].eth_addr, neigh->ha, ETH_ALEN) == 0) + if (en->user_table[idx].dst_addr == dst_addr && + en->user_table[idx].src_addr == src_addr && + memcmp(en->user_table[idx].eth_addr, neigh->ha, ETH_ALEN) == 0) { /* Avoid invalidating caches if no change */ return; } /* Tell user processes to skip over this entry */ - dst_user_table[idx].state = EXA_DST_ENTRY_INVALID; + en->user_table[idx].state = EXA_DST_ENTRY_INVALID; - dst_user_table[idx].dst_addr = dst_addr; - dst_user_table[idx].src_addr = src_addr; - memcpy(dst_user_table[idx].eth_addr, neigh->ha, ETH_ALEN); - dst_user_table[idx].def_rt = + en->user_table[idx].dst_addr = dst_addr; + en->user_table[idx].src_addr = src_addr; + memcpy(en->user_table[idx].eth_addr, neigh->ha, ETH_ALEN); + en->user_table[idx].def_rt = #ifdef __HAS_RT_TABLE_ID (rt->rt_table_id == RT_TABLE_MAIN || rt->rt_table_id == RT_TABLE_DEFAULT) ? 1 : 0; @@ -146,23 +154,23 @@ static void __update_user_dst_entry( default_rt ? 1 : 0; #endif if (neigh->nud_state & NUD_VALID) - dst_user_table[idx].state = EXA_DST_ENTRY_VALID; + en->user_table[idx].state = EXA_DST_ENTRY_VALID; else - dst_user_table[idx].state = EXA_DST_ENTRY_INCOMPLETE; + en->user_table[idx].state = EXA_DST_ENTRY_INCOMPLETE; /* This will cause anyone who has cached this entry to refresh */ - dst_user_table[idx].gen_id++; + en->user_table[idx].gen_id++; } else { - if (dst_user_table[idx].state == EXA_DST_ENTRY_EMPTY) + if (en->user_table[idx].state == EXA_DST_ENTRY_EMPTY) return; - dst_user_table[idx].state = EXA_DST_ENTRY_EMPTY; - dst_user_table[idx].dst_addr = 0; - dst_user_table[idx].src_addr = 0; - memset(dst_user_table[idx].eth_addr, 0, ETH_ALEN); - dst_user_table[idx].gen_id++; + en->user_table[idx].state = EXA_DST_ENTRY_EMPTY; + en->user_table[idx].dst_addr = 0; + en->user_table[idx].src_addr = 0; + memset(en->user_table[idx].eth_addr, 0, ETH_ALEN); + en->user_table[idx].gen_id++; } } @@ -189,88 +197,163 @@ static void __free_dst_entry(struct exasock_dst_entry *de) } /* Find entry, returns next empty entry if not found, lock must be held */ -static unsigned int __find_dst_entry(uint32_t daddr, uint32_t saddr) +static unsigned int __find_dst_entry(struct exasock_dst_net *en, uint32_t daddr, uint32_t saddr) { unsigned int hash, idx; - hash = idx = exa_dst_hash(daddr) & (dst_table_size - 1); + hash = idx = exa_dst_hash(daddr) & (en->table_size - 1); while (true) { - if (dst_table[idx] == NULL || + if (en->table[idx] == NULL || #ifndef __HAS_OLD_NETCORE - (dst_table[idx]->fl4.daddr == daddr && - dst_table[idx]->fl4.saddr == saddr)) + (en->table[idx]->fl4.daddr == daddr && + en->table[idx]->fl4.saddr == saddr)) #else - (dst_table[idx]->rt->rt_dst == daddr && - dst_table[idx]->rt->rt_src == saddr)) + (en->table[idx]->rt->rt_dst == daddr && + en->table[idx]->rt->rt_src == saddr)) #endif return idx; - idx = (idx + 1) & (dst_table_size - 1); + idx = (idx + 1) & (en->table_size - 1); if (idx == hash) return ~0; } } /* Update timer to fire at the expiry of the next entry, lock must be held */ -static void __update_dst_expiry_timer(void) +static void __update_dst_expiry_timer(struct exasock_dst_net *en) { struct exasock_dst_entry *de; - if (dst_expiry_timer_running && !list_empty(&dst_entries)) + if (en->expiry_timer_running && !list_empty(&en->entries)) { - de = list_first_entry(&dst_entries, struct exasock_dst_entry, list); - mod_timer(&dst_expiry_timer, de->used + DST_EXPIRY_TIME); + de = list_first_entry(&en->entries, struct exasock_dst_entry, list); + mod_timer(&en->expiry_timer, de->used + DST_EXPIRY_TIME); } } /* Remove an entry from the hash table, lock must be held */ -static void __remove_dst_entry(unsigned int idx) +static void __remove_dst_entry(struct exasock_dst_net *en, unsigned int idx) { unsigned int empty_idx, hash_idx; uint32_t daddr; /* Remove the hash table entry */ - dst_table[idx] = NULL; + en->table[idx] = NULL; empty_idx = idx; /* Shuffle entries up if necessary */ while (true) { - idx = (idx + 1) & (dst_table_size - 1); + idx = (idx + 1) & (en->table_size - 1); - if (!dst_table[idx]) + if (!en->table[idx]) break; #ifndef __HAS_OLD_NETCORE - daddr = dst_table[idx]->fl4.daddr; + daddr = en->table[idx]->fl4.daddr; #else - daddr = dst_table[idx]->rt->rt_dst; + daddr = en->table[idx]->rt->rt_dst; #endif - hash_idx = exa_dst_hash(daddr) & (dst_table_size - 1); + hash_idx = exa_dst_hash(daddr) & (en->table_size - 1); - if (((idx - hash_idx) & (dst_table_size - 1)) >= - ((idx - empty_idx) & (dst_table_size - 1))) + if (((idx - hash_idx) & (en->table_size - 1)) >= + ((idx - empty_idx) & (en->table_size - 1))) { - dst_table[empty_idx] = dst_table[idx]; - dst_table[empty_idx]->idx = empty_idx; - dst_table[idx] = NULL; - __update_user_dst_entry( + en->table[empty_idx] = en->table[idx]; + en->table[empty_idx]->idx = empty_idx; + en->table[idx] = NULL; + __update_user_dst_entry(en, #ifndef __HAS_RT_TABLE_ID - dst_table[empty_idx]->default_rt, + en->table[empty_idx]->default_rt, #endif empty_idx); empty_idx = idx; } } - __update_user_dst_entry( + __update_user_dst_entry(en, #ifndef __HAS_RT_TABLE_ID false, #endif empty_idx); } +/* Update a table entry after a neighbour reply */ +static void __neigh_update(struct exasock_dst_net *en, struct neighbour *neigh) +{ + struct exasock_dst_entry *de; + struct exasock_dst_queue_entry *qe, *tmp; + unsigned int hash; + LIST_HEAD(temp_head); + + if (!(neigh->nud_state & NUD_VALID)) + return; + + spin_lock_bh(&en->lock); + + hash = hash_ptr(neigh, NEIGH_HASH_BITS); + list_for_each_entry(de, &en->neigh_hash[hash], neigh_hash) + { + if (de->neigh == neigh) + { + __update_user_dst_entry(en, +#ifndef __HAS_RT_TABLE_ID + de->default_rt, +#endif + de->idx); + + /* Move the packets on the queue to our temporary list */ + list_splice_tail_init(&de->dst_queue, &temp_head); + } + } + + /* Send packets in our temporary list */ + list_for_each_entry_safe(qe, tmp, &temp_head, list) + { + struct sk_buff *skb = qe->skb; + struct net_device *skbdev = skb->dev; + struct net_device *realdev = skbdev; + + /* Fill out ethernet header in packet */ +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (skbdev->priv_flags & IFF_802_1Q_VLAN) + { + struct vlan_ethhdr *hdr; + + hdr = (struct vlan_ethhdr *)skb_push(skb, VLAN_ETH_HLEN); + memcpy(hdr->h_dest, neigh->ha, ETH_ALEN); + memcpy(hdr->h_source, skbdev->dev_addr, ETH_ALEN); + hdr->h_vlan_proto = htons(ETH_P_8021Q); + hdr->h_vlan_TCI = htons(vlan_dev_vlan_id(skbdev)); + hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP); + realdev = vlan_dev_real_dev(skbdev); + } + else +#endif + { + struct ethhdr *hdr; + + hdr = (struct ethhdr *)skb_push(skb, ETH_HLEN); + memcpy(hdr->h_dest, neigh->ha, ETH_ALEN); + memcpy(hdr->h_source, skbdev->dev_addr, ETH_ALEN); + hdr->h_proto = htons(ETH_P_IP); + } + + /* Send packet */ + exanic_transmit_frame(realdev, skb); + + dev_put(skbdev); + list_del(&qe->list); + kfree(qe); + } + + spin_unlock_bh(&en->lock); + + BUG_ON(!list_empty(&temp_head)); +} + + /* Check first entry in the list, remove if expired and adjust the table */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) static void dst_expiry_timer_handler(struct timer_list *t) @@ -278,35 +361,41 @@ static void dst_expiry_timer_handler(struct timer_list *t) static void dst_expiry_timer_handler(unsigned long data) #endif { + struct exasock_dst_net *en = +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) + from_timer(exasock_dst_net, t, expiry_timer); +#else + (struct exasock_dst_net *)data; +#endif struct neighbour *new_neigh; struct exasock_dst_entry *de; - spin_lock_bh(&dst_lock); + spin_lock_bh(&en->lock); - if (list_empty(&dst_entries)) + if (list_empty(&en->entries)) { - __update_dst_expiry_timer(); - spin_unlock_bh(&dst_lock); + __update_dst_expiry_timer(en); + spin_unlock_bh(&en->lock); return; } /* Get first entry in dst_entries list */ - de = list_first_entry(&dst_entries, struct exasock_dst_entry, list); + de = list_first_entry(&en->entries, struct exasock_dst_entry, list); - BUG_ON(dst_table[de->idx] != de); + BUG_ON(en->table[de->idx] != de); if (time_after(de->used + DST_EXPIRY_TIME, jiffies)) { /* Entry is not expiring yet - timer fired unexpectedly? */ - __update_dst_expiry_timer(); - spin_unlock_bh(&dst_lock); + __update_dst_expiry_timer(en); + spin_unlock_bh(&en->lock); return; } /* Remove the entry if not used since last check */ - if (!dst_used_flags[de->idx]) + if (!en->used_flags[de->idx]) goto remove_entry; - dst_used_flags[de->idx] = 0; + en->used_flags[de->idx] = 0; /* Check route to see if it is stale */ #if __HAS_RT_GENID_GETTER_IPV4 @@ -336,10 +425,10 @@ static void dst_expiry_timer_handler(unsigned long data) /* Get new route from routing table */ #ifndef __HAS_OLD_NETCORE - de->rt = __ip_route_output_key(&init_net, &fl4); + de->rt = __ip_route_output_key(en->net, &fl4); if (IS_ERR(de->rt)) #else - if (__ip_route_output_key(&init_net, &de->rt, &fl) != 0) + if (__ip_route_output_key(en->net, &de->rt, &fl) != 0) #endif { de->rt = NULL; @@ -350,8 +439,8 @@ static void dst_expiry_timer_handler(unsigned long data) de->neigh = new_neigh; hash = hash_ptr(new_neigh, NEIGH_HASH_BITS); list_del(&de->neigh_hash); - list_add_tail(&de->neigh_hash, &dst_neigh_hash[hash]); - __update_user_dst_entry( + list_add_tail(&de->neigh_hash, &en->neigh_hash[hash]); + __update_user_dst_entry(en, #ifndef __HAS_RT_TABLE_ID de->default_rt, #endif @@ -361,10 +450,10 @@ static void dst_expiry_timer_handler(unsigned long data) /* Update last used time of entry */ de->used = jiffies; list_del(&de->list); - list_add_tail(&de->list, &dst_entries); - __update_dst_expiry_timer(); + list_add_tail(&de->list, &en->entries); + __update_dst_expiry_timer(en); - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); /* Update Linux neighbour table usage */ neigh_event_send(de->neigh, NULL); @@ -373,30 +462,32 @@ static void dst_expiry_timer_handler(unsigned long data) remove_entry: list_del(&de->list); list_del(&de->neigh_hash); - __remove_dst_entry(de->idx); - __update_dst_expiry_timer(); + __remove_dst_entry(en, de->idx); + __update_dst_expiry_timer(en); - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); __free_dst_entry(de); } /* Remove any packets pending in destination table queue related to a given * connection */ -void exasock_dst_remove_socket(uint32_t local_addr, uint32_t peer_addr, +void exasock_dst_remove_socket(struct net *net, + uint32_t local_addr, uint32_t peer_addr, uint16_t local_port, uint16_t peer_port) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); struct exasock_dst_queue_entry *qe, *tmp; struct exasock_dst_entry *de; unsigned idx; - spin_lock_bh(&dst_lock); + spin_lock_bh(&en->lock); - idx = __find_dst_entry(peer_addr, local_addr); - if ((idx == ~0) || dst_table[idx] == NULL) + idx = __find_dst_entry(en, peer_addr, local_addr); + if ((idx == ~0) || en->table[idx] == NULL) goto exit; - de = dst_table[idx]; + de = en->table[idx]; list_for_each_entry_safe(qe, tmp, &de->dst_queue, list) { @@ -418,91 +509,26 @@ void exasock_dst_remove_socket(uint32_t local_addr, uint32_t peer_addr, } } exit: - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); } /** * Update a table entry after a neighbour reply. */ -void exasock_dst_neigh_update(struct neighbour *neigh) +void exasock_dst_neigh_update(struct net *net, struct neighbour *neigh) { - struct exasock_dst_entry *de; - struct exasock_dst_queue_entry *qe, *tmp; - unsigned int hash; - LIST_HEAD(temp_head); - - if (!(neigh->nud_state & NUD_VALID)) - return; - - spin_lock_bh(&dst_lock); - - hash = hash_ptr(neigh, NEIGH_HASH_BITS); - list_for_each_entry(de, &dst_neigh_hash[hash], neigh_hash) - { - if (de->neigh == neigh) - { - __update_user_dst_entry( -#ifndef __HAS_RT_TABLE_ID - de->default_rt, -#endif - de->idx); - - /* Move the packets on the queue to our temporary list */ - list_splice_tail_init(&de->dst_queue, &temp_head); - } - } - - /* Send packets in our temporary list */ - list_for_each_entry_safe(qe, tmp, &temp_head, list) - { - struct sk_buff *skb = qe->skb; - struct net_device *skbdev = skb->dev; - struct net_device *realdev = skbdev; - - /* Fill out ethernet header in packet */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - if (skbdev->priv_flags & IFF_802_1Q_VLAN) - { - struct vlan_ethhdr *hdr; - - hdr = (struct vlan_ethhdr *)skb_push(skb, VLAN_ETH_HLEN); - memcpy(hdr->h_dest, neigh->ha, ETH_ALEN); - memcpy(hdr->h_source, skbdev->dev_addr, ETH_ALEN); - hdr->h_vlan_proto = htons(ETH_P_8021Q); - hdr->h_vlan_TCI = htons(vlan_dev_vlan_id(skbdev)); - hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP); - realdev = vlan_dev_real_dev(skbdev); - } - else -#endif - { - struct ethhdr *hdr; - - hdr = (struct ethhdr *)skb_push(skb, ETH_HLEN); - memcpy(hdr->h_dest, neigh->ha, ETH_ALEN); - memcpy(hdr->h_source, skbdev->dev_addr, ETH_ALEN); - hdr->h_proto = htons(ETH_P_IP); - } - - /* Send packet */ - exanic_transmit_frame(realdev, skb); - - dev_put(skbdev); - list_del(&qe->list); - kfree(qe); - } - - spin_unlock_bh(&dst_lock); - - BUG_ON(!list_empty(&temp_head)); + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); + __neigh_update(en, neigh); } /** * Look up or create destination entry and insert skb into queue. */ -int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, +int exasock_dst_insert(struct net *net, + uint32_t dst_addr, uint32_t *src_addr, struct sk_buff *skb) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); struct exasock_dst_entry *de; struct exasock_dst_queue_entry *qe; struct net_device *ndev, *realdev; @@ -523,11 +549,11 @@ int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, /* Determine output interface */ #ifndef __HAS_OLD_NETCORE - rt = __ip_route_output_key(&init_net, &fl4); + rt = __ip_route_output_key(en->net, &fl4); if (IS_ERR(rt)) #else rt = NULL; - err = __ip_route_output_key(&init_net, &rt, &fl); + err = __ip_route_output_key(en->net, &rt, &fl); if (err) #endif { @@ -542,7 +568,7 @@ int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, saddr = rt->rt_src; oif = rt->rt_iif; #endif - ndev = dev_get_by_index(&init_net, oif); + ndev = dev_get_by_index(en->net, oif); if (ndev == NULL) { err = -ENETUNREACH; @@ -588,20 +614,20 @@ int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, ndev = NULL; } - spin_lock_bh(&dst_lock); + spin_lock_bh(&en->lock); - idx = __find_dst_entry(dst_addr, saddr); + idx = __find_dst_entry(en, dst_addr, saddr); if (idx == ~0) { err = -ENOMEM; goto err_find_dst_entry; } - else if (dst_table[idx]) + else if (en->table[idx]) { /* Existing entry */ kfree(de); - de = dst_table[idx]; + de = en->table[idx]; list_del(&de->list); } else @@ -619,43 +645,43 @@ int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, de->idx = idx; INIT_LIST_HEAD(&de->dst_queue); hash = hash_ptr(de->neigh, NEIGH_HASH_BITS); - list_add_tail(&de->neigh_hash, &dst_neigh_hash[hash]); - dst_table[idx] = de; - __update_user_dst_entry( + list_add_tail(&de->neigh_hash, &en->neigh_hash[hash]); + en->table[idx] = de; + __update_user_dst_entry(en, #ifndef __HAS_RT_TABLE_ID de->default_rt, #endif idx); } - dst_used_flags[idx] = 0; + en->used_flags[idx] = 0; de->used = jiffies; #ifndef __HAS_RT_TABLE_ID if (*src_addr == htonl(INADDR_ANY)) de->default_rt = true; #endif - list_add_tail(&de->list, &dst_entries); + list_add_tail(&de->list, &en->entries); if (qe) list_add_tail(&qe->list, &de->dst_queue); - __update_dst_expiry_timer(); + __update_dst_expiry_timer(en); - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); /* Initiate lookup using Linux neighbour cache */ neigh_event_send(de->neigh, NULL); /* Packet could have been queued even though neigh is valid */ if (de->neigh->nud_state & NUD_VALID) - exasock_dst_neigh_update(de->neigh); + __neigh_update(en, de->neigh); *src_addr = saddr; return 0; err_find_dst_entry: - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); kfree(qe); err_queue_alloc: kfree(de); @@ -673,19 +699,20 @@ int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, /** * Remove all table entries which contain source address src_addr */ -void exasock_dst_invalidate_src(uint32_t src_addr) +void exasock_dst_invalidate_src(struct net *net, uint32_t src_addr) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); struct exasock_dst_entry *de; unsigned int idx; - for (idx = 0; idx < dst_table_size; idx++) + for (idx = 0; idx < en->table_size; idx++) { /* Read the user table to avoid having to take a lock * unless we find a match */ - if (dst_user_table[idx].src_addr == src_addr) + if (en->user_table[idx].src_addr == src_addr) { - spin_lock_bh(&dst_lock); - de = dst_table[idx]; + spin_lock_bh(&en->lock); + de = en->table[idx]; if (de != NULL && #ifndef __HAS_OLD_NETCORE de->fl4.saddr == src_addr) @@ -696,106 +723,131 @@ void exasock_dst_invalidate_src(uint32_t src_addr) /* Found a match, remove the table entry */ list_del(&de->list); list_del(&de->neigh_hash); - __remove_dst_entry(idx); - spin_unlock_bh(&dst_lock); + __remove_dst_entry(en, idx); + spin_unlock_bh(&en->lock); __free_dst_entry(de); } else - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); } } } -int exasock_dst_used_flags_mmap(struct vm_area_struct *vma) +int exasock_dst_used_flags_mmap(struct net *net, struct vm_area_struct *vma) { - return remap_vmalloc_range(vma, dst_used_flags, + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); + return remap_vmalloc_range(vma, en->used_flags, vma->vm_pgoff - (EXASOCK_OFFSET_DST_USED_FLAGS / PAGE_SIZE)); } -int exasock_dst_table_mmap(struct vm_area_struct *vma) +int exasock_dst_table_mmap(struct net *net, struct vm_area_struct *vma) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); + if (vma->vm_flags & VM_WRITE) return -EACCES; - return remap_vmalloc_range(vma, dst_user_table, + return remap_vmalloc_range(vma, en->user_table, vma->vm_pgoff - (EXASOCK_OFFSET_DST_TABLE / PAGE_SIZE)); } -unsigned int exasock_dst_table_size(void) +unsigned int exasock_dst_table_size(struct net *net) { - return dst_table_size; + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); + return en->table_size; } -/** - * This function is called from exasock_init() when the driver is loaded. - */ -int __init exasock_dst_init(void) +static int __net_init exasock_dst_net_init(struct net *net) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); unsigned i; int err; - dst_table_size = DEFAULT_DST_TABLE_SIZE; - dst_table = kcalloc(dst_table_size, sizeof(struct exasock_dst_entry *), + en->table_size = DEFAULT_DST_TABLE_SIZE; + en->table = kcalloc(en->table_size, sizeof(struct exasock_dst_entry *), GFP_KERNEL); - dst_neigh_hash = kmalloc(NEIGH_HASH_SIZE * sizeof(struct list_head), + en->neigh_hash = kmalloc(NEIGH_HASH_SIZE * sizeof(struct list_head), GFP_KERNEL); - dst_used_flags = vmalloc_user(dst_table_size * sizeof(uint8_t)); - dst_user_table = vmalloc_user(dst_table_size * + en->used_flags = vmalloc_user(en->table_size * sizeof(uint8_t)); + en->user_table = vmalloc_user(en->table_size * sizeof(struct exa_dst_entry)); - if (dst_table == NULL || dst_neigh_hash == NULL || - dst_used_flags == NULL || dst_user_table == NULL) + if (en->table == NULL || en->neigh_hash == NULL || + en->used_flags == NULL || en->user_table == NULL) { err = -ENOMEM; goto err_alloc; } - INIT_LIST_HEAD(&dst_entries); + INIT_LIST_HEAD(&en->entries); for (i = 0; i < NEIGH_HASH_SIZE; i++) - INIT_LIST_HEAD(&dst_neigh_hash[i]); + INIT_LIST_HEAD(&en->neigh_hash[i]); - dst_expiry_timer_running = true; + spin_lock_init(&en->lock); + en->net = net; + en->expiry_timer_running = true; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) - timer_setup(&dst_expiry_timer, dst_expiry_timer_handler, 0); + timer_setup(&en->expiry_timer, dst_expiry_timer_handler, 0); #else - setup_timer(&dst_expiry_timer, dst_expiry_timer_handler, 0); + setup_timer(&en->expiry_timer, dst_expiry_timer_handler, (unsigned long)en); #endif return 0; err_alloc: - vfree(dst_user_table); - vfree(dst_used_flags); - kfree(dst_neigh_hash); - kfree(dst_table); + vfree(en->user_table); + vfree(en->used_flags); + kfree(en->neigh_hash); + kfree(en->table); return err; } -/** - * This function is called from exasock_exit() when the driver is unloaded - * and by exasock_init() on error. - */ -void exasock_dst_exit(void) +static void __net_exit exasock_dst_net_exit(struct net *net) { + struct exasock_dst_net *en = net_generic(net, exasock_dst_net_id); struct exasock_dst_entry *de, *tmp; - dst_expiry_timer_running = false; - del_timer_sync(&dst_expiry_timer); + en->expiry_timer_running = false; + del_timer_sync(&en->expiry_timer); - spin_lock_bh(&dst_lock); + spin_lock_bh(&en->lock); - list_for_each_entry_safe(de, tmp, &dst_entries, list) + list_for_each_entry_safe(de, tmp, &en->entries, list) { list_del(&de->list); list_del(&de->neigh_hash); - dst_table[de->idx] = NULL; + en->table[de->idx] = NULL; __free_dst_entry(de); } - spin_unlock_bh(&dst_lock); + spin_unlock_bh(&en->lock); + + kfree(en->table); + kfree(en->neigh_hash); + vfree(en->used_flags); + vfree(en->user_table); +} + +static struct pernet_operations exasock_dst_net_ops = { + .init = exasock_dst_net_init, + .exit = exasock_dst_net_exit, + .id = &exasock_dst_net_id, + .size = sizeof(struct exasock_dst_net), +}; + +/** + * This function is called from exasock_init() when the driver is loaded. + */ +int __init exasock_dst_init(void) +{ + return register_pernet_subsys(&exasock_dst_net_ops); +} - kfree(dst_table); - kfree(dst_neigh_hash); - vfree(dst_used_flags); - vfree(dst_user_table); +/** + * This function is called from exasock_exit() when the driver is unloaded + * and by exasock_init() on error. + */ +void exasock_dst_exit(void) +{ + unregister_pernet_subsys(&exasock_dst_net_ops); } diff --git a/modules/exasock/exasock-ip.c b/modules/exasock/exasock-ip.c index aeaf3f1..411cbc4 100644 --- a/modules/exasock/exasock-ip.c +++ b/modules/exasock/exasock-ip.c @@ -18,7 +18,8 @@ #include "../exanic/exanic.h" #include "exasock.h" -int exasock_ip_send(uint8_t proto, uint32_t dst_addr, uint32_t src_addr, +int exasock_ip_send(struct net *net, + uint8_t proto, uint32_t dst_addr, uint32_t src_addr, struct sk_buff *skb) { struct iphdr *iph; @@ -38,5 +39,8 @@ int exasock_ip_send(uint8_t proto, uint32_t dst_addr, uint32_t src_addr, skb_reset_network_header(skb); - return exasock_dst_insert(dst_addr, &src_addr, skb); + if (net != NULL) + return exasock_dst_insert(net, dst_addr, &src_addr, skb); + else + return 0; } diff --git a/modules/exasock/exasock-main.c b/modules/exasock/exasock-main.c index 0dc7724..8a93c70 100644 --- a/modules/exasock/exasock-main.c +++ b/modules/exasock/exasock-main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,11 @@ bool module_removed; static struct exasock_kernel_info *exasock_info_page; static struct file_operations exasock_fops; +static struct net *get_current_net(void) +{ + return get_net(current->nsproxy->net_ns); +} + static int exasock_net_event(struct notifier_block *notifier, unsigned long event, void *ptr) { @@ -35,10 +41,12 @@ static int exasock_net_event(struct notifier_block *notifier, { case NETEVENT_NEIGH_UPDATE: { + struct net *net; struct neighbour *neigh = ptr; if (neigh->tbl != &arp_tbl) break; - exasock_dst_neigh_update(neigh); + net = dev_net(neigh->dev); + exasock_dst_neigh_update(net, neigh); return NOTIFY_OK; } @@ -61,7 +69,7 @@ static int exasock_inetaddr_event(struct notifier_block *notifier, switch (event) { case NETDEV_DOWN: - exasock_dst_invalidate_src(ifa->ifa_address); + exasock_dst_invalidate_src(dev_net(ifa->ifa_dev->dev), ifa->ifa_address); break; default: @@ -75,10 +83,12 @@ static struct notifier_block exasock_inetaddr_notifier = { .notifier_call = exasock_inetaddr_event }; -static int exasock_dst_queue(uint32_t addr, uint32_t *src_addr, +static int exasock_dst_queue( + uint32_t addr, uint32_t *src_addr, const char __user *buf, size_t len) { struct sk_buff *skb = NULL; + struct net *net = get_current_net(); int err; if (len > 0) @@ -100,7 +110,7 @@ static int exasock_dst_queue(uint32_t addr, uint32_t *src_addr, } } - return exasock_dst_insert(addr, src_addr, skb); + return exasock_dst_insert(net, addr, src_addr, skb); err_copy_from_user: kfree_skb(skb); @@ -224,6 +234,7 @@ static int exasock_socket_mmap(struct exasock_hdr *common, static int exasock_dev_mmap(struct file *filp, struct vm_area_struct *vma) { void *priv = filp->private_data; + struct net *net = get_current_net(); if (vma->vm_pgoff >= (EXASOCK_OFFSET_EPOLL_STATE / PAGE_SIZE)) return exasock_epoll_state_mmap((struct exasock_epoll *)priv, vma); @@ -232,9 +243,9 @@ static int exasock_dev_mmap(struct file *filp, struct vm_area_struct *vma) else if (vma->vm_pgoff >= (EXASOCK_OFFSET_RX_BUFFER / PAGE_SIZE)) return exasock_socket_mmap((struct exasock_hdr *)priv, vma); else if (vma->vm_pgoff >= (EXASOCK_OFFSET_DST_USED_FLAGS / PAGE_SIZE)) - return exasock_dst_used_flags_mmap(vma); + return exasock_dst_used_flags_mmap(net, vma); else if (vma->vm_pgoff >= (EXASOCK_OFFSET_DST_TABLE / PAGE_SIZE)) - return exasock_dst_table_mmap(vma); + return exasock_dst_table_mmap(net, vma); else if (vma->vm_pgoff >= (EXASOCK_OFFSET_SOCKET_STATE / PAGE_SIZE)) return exasock_socket_mmap((struct exasock_hdr *)priv, vma); else @@ -388,7 +399,8 @@ static long exasock_dev_ioctl(struct file *filp, unsigned int cmd, if (copy_from_user(&req, (void *)arg, sizeof(req)) != 0) return -EFAULT; - err = exasock_dst_queue(req.dst_addr, &req.src_addr, req.ip_packet, + err = exasock_dst_queue( + req.dst_addr, &req.src_addr, req.ip_packet, req.ip_packet_len); if (err) return err; @@ -639,7 +651,7 @@ static int __init exasock_init(void) goto err_vmalloc; exasock_info_page->api_version = EXASOCK_API_VERSION; - exasock_info_page->dst_table_size = exasock_dst_table_size(); + exasock_info_page->dst_table_size = exasock_dst_table_size(&init_net); /* Create /dev/exasock device */ exasock_dev.minor = MISC_DYNAMIC_MINOR; diff --git a/modules/exasock/exasock-tcp.c b/modules/exasock/exasock-tcp.c index f35c85e..04b65d1 100644 --- a/modules/exasock/exasock-tcp.c +++ b/modules/exasock/exasock-tcp.c @@ -291,7 +291,7 @@ static void exasock_tcp_retransmit(struct exasock_tcp *tcp, bool fast_retrans); static void exasock_tcp_send_ack(struct exasock_tcp *tcp, bool dup); static void exasock_tcp_send_reset(struct exasock_tcp *tcp); uint32_t exasock_tcp_req_get_isn(struct exasock_tcp_req *req); -static void exasock_tcp_send_syn_ack(struct exasock_tcp_req *req); +static void exasock_tcp_send_syn_ack(struct exasock_tcp *tcp, struct exasock_tcp_req *req); static void exasock_tcp_send_probe(struct exasock_tcp *tcp); static struct exasock_tcp *exasock_tcp_lookup(uint32_t local_addr, uint32_t peer_addr, @@ -960,6 +960,7 @@ void exasock_tcp_close(struct exasock_tcp *tcp) * or from a user app's context */ static void exasock_tcp_free(struct exasock_tcp *tcp) { + struct net *net = sock_net(tcp->sock->sk); BUG_ON(tcp->hdr.type != EXASOCK_TYPE_SOCKET); BUG_ON(tcp->hdr.socket.domain != AF_INET); BUG_ON(tcp->hdr.socket.type != SOCK_STREAM); @@ -986,7 +987,8 @@ static void exasock_tcp_free(struct exasock_tcp *tcp) /* If there are still any packets pending in destination table queue, * it means the socket does not have a valid neighbour. These packets * need to be removed now. */ - exasock_dst_remove_socket(tcp->local_addr, tcp->peer_addr, + exasock_dst_remove_socket(net, + tcp->local_addr, tcp->peer_addr, tcp->local_port, tcp->peer_port); /* Remove from epoll notify */ @@ -1953,7 +1955,7 @@ static void exasock_tcp_req_worker(struct work_struct *work) { req->timestamp = jiffies; ++req->synack_attempts; - exasock_tcp_send_syn_ack(req); + exasock_tcp_send_syn_ack(NULL, req); } } spin_unlock(&tcp_req_lock); @@ -2062,7 +2064,7 @@ static int exasock_tcp_req_process(struct sk_buff *skb, struct exasock_tcp *tcp, } /* Send SYN ACK packet */ - exasock_tcp_send_syn_ack(req); + exasock_tcp_send_syn_ack(tcp, req); /* Insert into hash table and lists */ hash = exasock_tcp_hash(req->local_addr, req->peer_addr, @@ -2501,6 +2503,7 @@ static void exasock_tcp_send_segment(struct exasock_tcp *tcp, uint32_t seq, uint32_t len, bool dup) { struct exa_socket_state *state = tcp->user_page; + struct net *net = sock_net(tcp->sock->sk); struct sk_buff *skb; struct tcphdr *th; uint8_t tcp_state; @@ -2663,7 +2666,7 @@ static void exasock_tcp_send_segment(struct exasock_tcp *tcp, uint32_t seq, th->check = csum_tcpudp_magic(tcp->peer_addr, tcp->local_addr, skb->len, IPPROTO_TCP, csum_partial(th, skb->len, 0)); - exasock_ip_send(IPPROTO_TCP, tcp->peer_addr, tcp->local_addr, skb); + exasock_ip_send(net, IPPROTO_TCP, tcp->peer_addr, tcp->local_addr, skb); return; abort_packet: @@ -2733,6 +2736,7 @@ static void exasock_tcp_send_ack(struct exasock_tcp *tcp, bool dup) static void exasock_tcp_send_reset(struct exasock_tcp *tcp) { struct exa_socket_state *state = tcp->user_page; + struct net *net = sock_net(tcp->sock->sk); struct sk_buff *skb; struct tcphdr *th; uint8_t tcp_state; @@ -2790,15 +2794,16 @@ static void exasock_tcp_send_reset(struct exasock_tcp *tcp) th->check = csum_tcpudp_magic(tcp->peer_addr, tcp->local_addr, skb->len, IPPROTO_TCP, csum_partial(th, skb->len, 0)); - exasock_ip_send(IPPROTO_TCP, tcp->peer_addr, tcp->local_addr, skb); + exasock_ip_send(net, IPPROTO_TCP, tcp->peer_addr, tcp->local_addr, skb); return; abort_packet: kfree_skb(skb); } -static void exasock_tcp_send_syn_ack(struct exasock_tcp_req *req) +static void exasock_tcp_send_syn_ack(struct exasock_tcp *tcp, struct exasock_tcp_req *req) { + struct net *net = tcp != NULL ? sock_net(tcp->sock->sk) : NULL; struct sk_buff *skb; struct tcphdr *th; uint8_t *opts; @@ -2836,7 +2841,7 @@ static void exasock_tcp_send_syn_ack(struct exasock_tcp_req *req) th->check = csum_tcpudp_magic(req->peer_addr, req->local_addr, skb->len, IPPROTO_TCP, csum_partial(th, skb->len, 0)); - exasock_ip_send(IPPROTO_TCP, req->peer_addr, req->local_addr, skb); + exasock_ip_send(net, IPPROTO_TCP, req->peer_addr, req->local_addr, skb); } static void exasock_tcp_send_probe(struct exasock_tcp *tcp) diff --git a/modules/exasock/exasock.h b/modules/exasock/exasock.h index cf6ab9a..99c224a 100644 --- a/modules/exasock/exasock.h +++ b/modules/exasock/exasock.h @@ -60,18 +60,21 @@ static inline void exasock_unlock(volatile uint32_t *flag) /* exasock-dst.c */ int __init exasock_dst_init(void); void exasock_dst_exit(void); -void exasock_dst_remove_socket(uint32_t local_addr, uint32_t peer_addr, +void exasock_dst_remove_socket(struct net *net, + uint32_t local_addr, uint32_t peer_addr, uint16_t local_port, uint16_t peer_port); -void exasock_dst_neigh_update(struct neighbour *neigh); -int exasock_dst_insert(uint32_t dst_addr, uint32_t *src_addr, +void exasock_dst_neigh_update(struct net *net, struct neighbour *neigh); +int exasock_dst_insert(struct net *net, + uint32_t dst_addr, uint32_t *src_addr, struct sk_buff *skb); -void exasock_dst_invalidate_src(uint32_t src_addr); -int exasock_dst_used_flags_mmap(struct vm_area_struct *vma); -int exasock_dst_table_mmap(struct vm_area_struct *vma); -unsigned int exasock_dst_table_size(void); +void exasock_dst_invalidate_src(struct net *net, uint32_t src_addr); +int exasock_dst_used_flags_mmap(struct net *net, struct vm_area_struct *vma); +int exasock_dst_table_mmap(struct net *net, struct vm_area_struct *vma); +unsigned int exasock_dst_table_size(struct net *net); /* exasock-ip.c */ -int exasock_ip_send(uint8_t proto, uint32_t dst_addr, uint32_t src_addr, +int exasock_ip_send(struct net *net, + uint8_t proto, uint32_t dst_addr, uint32_t src_addr, struct sk_buff *skb); /* exasock-udp.c */