多进程共享socket之close调用

Time: 九月 8, 2013
Category: filesystem, tcp/ip internals

当父子进程共享一个socket描述符时,如果其中一个进程执行close调用,那会不会发送FIN包,进而影响另一个进程的其他相关操作?只会减少引用计数,没什么其他的操作。

看下源码,可以顺着以下的调用栈看:

close ------------ open.c
__close_fd ---- fs/file.c
filp_close ---------- fs/open.c
fput ------ fs/file_table.c
__fput ---------- fs/file_table.c (the last op)

首先socket描述符本身也属于Unix文件的一种,其文件相关的file_operations实现在net/socket.c源文件,如下:

/*
 *	Socket files have a set of 'special' operations as well as the generic file ones.
 *      These don't appear in the operation structures but are done directly via the socketcall()
 *      multiplexor.
 */

static const struct file_operations socket_file_ops = {
	.owner =	THIS_MODULE,
	.llseek =	no_llseek,
	.aio_read =	sock_aio_read,
	.aio_write =	sock_aio_write,
	.poll =		sock_poll,
	.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
	.compat_ioctl = compat_sock_ioctl,
#endif
	.mmap =		sock_mmap,
	.open =		sock_no_open,	/* special open code to disallow open via /proc */
	.release =	sock_close,
	.fasync =	sock_fasync,
	.sendpage =	sock_sendpage,
	.splice_write = generic_splice_sendpage,
	.splice_read =	sock_splice_read,
};

close() 系统调用

源码在fs/open.c文件里,可以看到close系统调用最终主要是通过__close_fd来完成的,其中current宏表示当前进程的描述信息体:

/*
 * Careful here! We test whether the file pointer is NULL before
 * releasing the fd. This ensures that one clone task can't release
 * an fd while another clone is opening it.
 */
SYSCALL_DEFINE1(close, unsigned int, fd)
{
	int retval = __close_fd(current->files, fd);

	/* can't restart close syscall because file table entry was cleared */
	if (unlikely(retval == -ERESTARTSYS ||
		     retval == -ERESTARTNOINTR ||
		     retval == -ERESTARTNOHAND ||
		     retval == -ERESTART_RESTARTBLOCK))
		retval = -EINTR;

	return retval;
}
EXPORT_SYMBOL(sys_close);

__close_fd() 函数

__close_fd()函数首先从文件描述符数组fdt(每个进程结构体都有一个,保存了当前进程所有打开的文件信息)找到fd对应的file结构,__clear_close_on_exec()清除改文件描述符号在fdt结构里相关的标志位,__put_unused_fd()函数回收该文件描述符,然后剩下的操作由filp_close函数完成。

/*
 * The same warnings as for __alloc_fd()/__fd_install() apply here...
 */
int __close_fd(struct files_struct *files, unsigned fd)
{
	struct file *file;
	struct fdtable *fdt;

	spin_lock(&files->file_lock);
	fdt = files_fdtable(files);
	if (fd >= fdt->max_fds)
		goto out_unlock;
	file = fdt->fd[fd];
	if (!file)
		goto out_unlock;
	rcu_assign_pointer(fdt->fd[fd], NULL);
	__clear_close_on_exec(fd, fdt);
	__put_unused_fd(files, fd);
	spin_unlock(&files->file_lock);
	return filp_close(file, files);

out_unlock:
	spin_unlock(&files->file_lock);
	return -EBADF;
}

这里需要注意的是,多进程共享socket时只是共享一个socket结构,或者是文件file结构,而不是共享fd

file_close() 函数

file_close() 函数首先检查改文件对应的file_operations回调中是否存在flush,前面我们可以看到,socket在创建时并没有指定flush回调,并根据该文件的FMODE_PATH标志做相关的操作(这个不是重点,先忽略),重点是fput函数

/*
 * "id" is the POSIX thread ID. We use the
 * files pointer for this..
 */
int filp_close(struct file *filp, fl_owner_t id)
{
	int retval = 0;

	if (!file_count(filp)) {
		printk(KERN_ERR "VFS: Close: file count is 0\n");
		return 0;
	}

	if (filp->f_op && filp->f_op->flush)
		retval = filp->f_op->flush(filp, id);

	if (likely(!(filp->f_mode & FMODE_PATH))) {
		dnotify_flush(filp, id);
		locks_remove_posix(filp, id);
	}
	fput(filp);
	return retval;
}

EXPORT_SYMBOL(filp_close);

fput() 函数

atomic_long_dec_and_test是一个原子操作,将file->f_count的引用计数减1,然后判断是否为0,如果为0,就表示当前进程是该文件的最后操作者,需要负责进一步的资源回收与清理工作,否则,直接返回。我们在这里可以看到如果在多进程共享socket的情况下,close一个socket描述符,只会将其file结构的引用计数减1,没有任何其他的操作。

void fput(struct file *file)
{
	if (atomic_long_dec_and_test(&file->f_count)) {
		struct task_struct *task = current;

		file_sb_list_del(file);
		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
			init_task_work(&file->f_u.fu_rcuhead, ____fput);
			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
				return;
			/*
			 * After this task has run exit_task_work(),
			 * task_work_add() will fail.  free_ipc_ns()->
			 * shm_destroy() can do this.  Fall through to delayed
			 * fput to avoid leaking *file.
			 */
		}

		if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
			schedule_work(&delayed_fput_work);
	}
}

如果当前进程是file结构的最后持有者,llist_add将该file结构挂到delayed_fput_list链表上,并调度相应的后台进程进行处理,这个处理函数就是delayed_fput_work,delayed_fput_work在声明的时候是这样的:

static DECLARE_WORK(delayed_fput_work, delayed_fput);

可见,其工作函数是delayed_fput,delayed_fput对于链表delayed_fput_list上的每一个file,调用__fput来回收和清理相关资源:

static LLIST_HEAD(delayed_fput_list);
static void delayed_fput(struct work_struct *unused)
{
	struct llist_node *node = llist_del_all(&delayed_fput_list);
	struct llist_node *next;

	for (; node; node = next) {
		next = llist_next(node);
		__fput(llist_entry(node, struct file, f_u.fu_llist));
	}
}

__fput() 源码如下:

static void __fput(struct file *file)
{
	struct dentry *dentry = file->f_path.dentry;
	struct vfsmount *mnt = file->f_path.mnt;
	struct inode *inode = file->f_inode;

	might_sleep();

	fsnotify_close(file);
	/*
	 * The function eventpoll_release() should be the first called
	 * in the file cleanup chain.
	 */
	eventpoll_release(file);
	locks_remove_flock(file);

	if (unlikely(file->f_flags & FASYNC)) {
		if (file->f_op && file->f_op->fasync)
			file->f_op->fasync(-1, file, 0);
	}
	ima_file_free(file);
	if (file->f_op && file->f_op->release)
		file->f_op->release(inode, file);
	security_file_free(file);
	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
		     !(file->f_mode & FMODE_PATH))) {
		cdev_put(inode->i_cdev);
	}
	fops_put(file->f_op);
	put_pid(file->f_owner.pid);
	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
		i_readcount_dec(inode);
	if (file->f_mode & FMODE_WRITE)
		drop_file_write_access(file);
	file->f_path.dentry = NULL;
	file->f_path.mnt = NULL;
	file->f_inode = NULL;
	file_free(file);
	dput(dentry);
	mntput(mnt);
}

其中 file->f_op->release(inode, file); 也就是socket的file_operations里的release函数:sock_close,负责socket资源的回收和清理。

Leave a Comment