linux kernel sourse tree의 깃허브 코드를 참조해 시스템 콜 호출 시 변화 과정을 분석한 글입니다.

📌 unlink()

  • 파일 시스템에서 이름과 연결된 파일 (inode)에 대한 포인터를 제거하는 작업
  • 디렉터리 엔트리 삭제
  • 빈 디렉터리는 rmdir() 시스템콜 호출로 삭제, 일반 파일만 unlink()로 삭제

🫧 과정

alt text

  1. 경로 해석
  2. 권한 확인
  3. 파일 삭제
  4. inode 후처리

🫧 특징

  • 중간에 lookup() 함수로 탐색을 하며 dentry → inode를 연결함
    • 만약 dentry가 이미 존재하면 캐시된 dentry를 재활용함 (inode가 같을 수도, 다를 수도)

🫧 사용 예시 (코드)

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>

int main(int argc, char *argv[])
{
    int idx;

    if(argc == 1) {
        fprintf(stderr, "%s {file1} [file2] ...\n", argv[0]);
        return 1;
    }

    for(idx = 1; idx < argc; idx++) {
        if(unlink(argv[idx]) == -1) {
            fprintf(stderr, "%s file 삭제 error: %s\n", argv[idx], stderror(errno));
        } else {
            printf("%s file이 삭제되었습니다.\n", argv[idx]);
        }
    }

    return 0;
}

🫧 코드

✨ SYSCALL_DEFINE1

  • /fs/namei.c, $4644
SYSCALL_DEFINE1(unlink, const char __user *, pathname)
{
	return **do_unlinkat(AT_FDCWD, getname(pathname))**;
}

✨ do_unlinkat()

  • /fs/namei.c, $4562
/*
 * Make sure that the actual truncation of the file will occur outside its
 * directory's i_mutex.  Truncate can take a long time if there is a lot of
 * writeout happening, and we don't want to prevent access to the directory
 * while waiting on the I/O.
 */
int do_unlinkat(int dfd, struct filename *name)
{
	int error;
	struct dentry *dentry;
	struct path path;
	struct qstr last;
	int type;
	struct inode *inode = NULL;
	struct inode *delegated_inode = NULL;
	unsigned int lookup_flags = 0;
retry:
	**// 1. 파일명을 부모 디렉터리와 파일명으로 분리**
	error = **filename_parentat(dfd, name, lookup_flags, &path, &last, &type)**;
	if (error)
		goto exit1;

	error = -EISDIR;
	if (type != LAST_NORM)
		goto exit2;
	
	**// 2. 쓰기 권한 획득**
	error = **mnt_want_write(path.mnt)**;
	if (error)
		goto exit2;
retry_deleg:
	**// 3. 디렉터리   삭제할 dentry 탐색**
	// 특정 inode에 대한 쓰기 락 획득
	inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
	// 삭제할 dentry 탐색
	dentry = **lookup_one_qstr_excl(&last, path.dentry, lookup_flags)**;
	error = PTR_ERR(dentry);
	if (!IS_ERR(dentry)) {

		/* Why not before? Because we want correct error value */
		if (last.name[last.len] || d_is_negative(dentry))
			goto slashes;
		inode = dentry->d_inode;
		ihold(inode);
		error = security_path_unlink(&path, dentry);
		if (error)
			goto exit3;
		**// 4. 실제 unlink 수행**
		error = **vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
				   dentry, &delegated_inode)**;
exit3:
		dput(dentry);
	}
	inode_unlock(path.dentry->d_inode);
	// i_nlink = 0이 되며 trancate 발생
	// 필요 시 truncate 수행
	if (inode)
		iput(inode);	/* truncate the inode here */
	inode = NULL;
	if (delegated_inode) {
		error = break_deleg_wait(&delegated_inode);
		if (!error)
			goto retry_deleg;
	}
	mnt_drop_write(path.mnt);
exit2:
	path_put(&path);
	if (retry_estale(error, lookup_flags)) {
		lookup_flags |= LOOKUP_REVAL;
		inode = NULL;
		goto retry;
	}
exit1:
	putname(name);
	return error;

slashes:
	if (d_is_negative(dentry))
		error = -ENOENT;
	else if (d_is_dir(dentry))
		error = -EISDIR;
	else
		error = -ENOTDIR;
	goto exit3;
}

✨ filename_parentat()

  • /fs/namei.c, $2779
static int filename_parentat(int dfd, struct filename *name,
			     unsigned int flags, struct path *parent,
			     struct qstr *last, int *type)
{
	return **__filename_parentat(dfd, name, flags, parent, last, type, NULL)**;
}

✨ __filename_parentat()

  • 전체 경로를 부모 디렉터리 + 파일명으로 분리

  • /fs/namei.c, $2703

/* Note: this does not consume "name" */
static int __filename_parentat(int dfd, struct filename *name,
			       unsigned int flags, struct path *parent,
			       struct qstr *last, int *type,
			       const struct path *root)
{
	int retval;
	struct nameidata nd;

	if (IS_ERR(name))
		return PTR_ERR(name);
	**// 1. nameidata 구조체 설정**
	set_nameidata(&nd, dfd, name, root);
	
	**// 2. 부모 경로로 이동 (경로 탐색 진행)**
	retval = **path_parentat(&nd, flags | LOOKUP_RCU, parent)**;
	if (unlikely(retval == -ECHILD))
		retval = path_parentat(&nd, flags, parent);
	if (unlikely(retval == -ESTALE))
		retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
	if (likely(!retval)) {
		*last = nd.last;
		*type = nd.last_type;
		audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
	}
	restore_nameidata();
	return retval;
}

✨ path_parentat()

  • 경로 탐색 후 부모 path 반환

  • /fs/namei.c, $2687

/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
static int path_parentat(struct nameidata *nd, unsigned flags,
				struct path *parent)
{
	**// 1. 경로 초기화  탐색**
	const char *s = path_init(nd, flags); // open() 함수에 정리 완료
	int err = link_path_walk(s, nd); // open() 함수에 정리되어 있음
	if (!err)
	
	**// 2. RCU 잠금 해제, 루트 범위 확인  경로 탐색  처리**
		err = **complete_walk(nd**);
	if (!err) {
		**// 3. 부모 path 반환**
		// unlink 하기 위해서는 부모 디렉터리 엔트리에서 해당 파일을 삭제해야 하므로 부모 path 필요
		*parent = nd->path;
		nd->path.mnt = NULL;
		nd->path.dentry = NULL;
	}
	terminate_walk(nd);
	return err;
}

✨ complete_walk()

  • 경로 탐색 후 마지막 처리

  • /fs/namei.c, $942
  • RCU 잠금 해제, 루트 범위 확인 등 경로 탐색 후 처리
static int complete_walk(struct nameidata *nd)
{
	struct dentry *dentry = nd->path.dentry;
	int status;

	if (nd->flags & LOOKUP_RCU) {
		/*
		 * We don't want to zero nd->root for scoped-lookups or
		 * externally-managed nd->root.
		 */
		if (!(nd->state & ND_ROOT_PRESET))
			if (!(nd->flags & LOOKUP_IS_SCOPED))
				nd->root.mnt = NULL;
		nd->flags &= ~LOOKUP_CACHED;
		if (!try_to_unlazy(nd))
			return -ECHILD;
	}

	if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
		/*
		 * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
		 * ever step outside the root during lookup" and should already
		 * be guaranteed by the rest of namei, we want to avoid a namei
		 * BUG resulting in userspace being given a path that was not
		 * scoped within the root at some point during the lookup.
		 *
		 * So, do a final sanity-check to make sure that in the
		 * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
		 * we won't silently return an fd completely outside of the
		 * requested root to userspace.
		 *
		 * Userspace could move the path outside the root after this
		 * check, but as discussed elsewhere this is not a concern (the
		 * resolved file was inside the root at some point).
		 */
		if (!path_is_under(&nd->path, &nd->root))
			return -EXDEV;
	}

	if (likely(!(nd->state & ND_JUMPED)))
		return 0;

	if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
		return 0;

	status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
	if (status > 0)
		return 0;

	if (!status)
		status = -ESTALE;

	return status;
}

✨ mnt_want_write()

  • 쓰기 작업 가능 여부 확인 및 잠금 (언마운트 되지 않도록 보호)

  • /fs/namespace.c, $511

int mnt_want_write(struct vfsmount *m)
{
	int ret;
	
	**// 1. superblock 쓰기 시작 선언 ( X, count 증가)**
	sb_start_write(m->mnt_sb);
	**// 2. 마운트 포인트에 쓰기 접근 권한 요청**
	ret = mnt_get_write_access(m);
	if (ret)
		// 실패 시 롤백
		sb_end_write(m->mnt_sb);
	return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);

✨ lookup_one_qstr_excl()

  • 삭제할 dentry 탐색

  • /fs/namei.c, $1673
  • dir→i_op→lookup() 호출을 통해 시스템별 unlink() 함수 호출
/*
 * Parent directory has inode locked exclusive.  This is one
 * and only case when ->lookup() gets called on non in-lookup
 * dentries - as the matter of fact, this only gets called
 * when directory is guaranteed to have no in-lookup children
 * at all.
 */
struct dentry *lookup_one_qstr_excl(const struct qstr *name,
				    struct dentry *base,
				    unsigned int flags)
{
	**// 1. 디렉터리 캐시에 파일명이 존재하는지 확인**
	struct dentry *dentry = lookup_dcache(name, base, flags);
	struct dentry *old;
	struct inode *dir = base->d_inode;

	if (dentry)
		return dentry;

	/* Don't create child dentry for a dead directory. */
	// 부모 디렉터리가 삭제되었거나 unlinked된 경우 오류
	if (unlikely(IS_DEADDIR(dir)))
		return ERR_PTR(-ENOENT);

	dentry = d_alloc(base, name);
	if (unlikely(!dentry))
		return ERR_PTR(-ENOMEM);

	old = **dir->i_op->lookup(dir, dentry, flags)**;
	if (unlikely(old)) {
		dput(dentry);
		dentry = old;
	}
	return dentry;
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
const struct inode_operations ext4_dir_inode_operations = {
	.create		= ext4_create,
	**.lookup		= ext4_lookup**,
	.link		= ext4_link,
	.unlink		= ext4_unlink,
	.symlink	= ext4_symlink,
	.mkdir		= ext4_mkdir,
	.rmdir		= ext4_rmdir,
	.mknod		= ext4_mknod,
	.tmpfile	= ext4_tmpfile,
	.rename		= ext4_rename2,
	.setattr	= ext4_setattr,
	.getattr	= ext4_getattr,
	.listxattr	= ext4_listxattr,
	.get_inode_acl	= ext4_get_acl,
	.set_acl	= ext4_set_acl,
	.fiemap         = ext4_fiemap,
	.fileattr_get	= ext4_fileattr_get,
	.fileattr_set	= ext4_fileattr_set,
};
  • ext4 디렉터리용 inode_operations 구조체에 lookup 포함. 이를 토대로 찾아서 ext4_lookup() 함수 호출

✨ ext4_lookup()

  • dentry 이름 -> inode로 매핑

  • /fs/ext4/namei.c, $1788

static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
	struct inode *inode;
	struct ext4_dir_entry_2 *de;
	struct buffer_head *bh;

	// 이름 길이 체크
	if (dentry->d_name.len > EXT4_NAME_LEN)
		return ERR_PTR(-ENAMETOOLONG);

	**// 1. 디렉터리 엔트리 탐색**
	bh = **ext4_lookup_entry(dir, dentry, &de)**;
	if (IS_ERR(bh))
		return ERR_CAST(bh);
	inode = NULL;
	if (bh) {
		__u32 ino = le32_to_cpu(de->inode);
		brelse(bh);
		if (!ext4_valid_inum(dir->i_sb, ino)) {
			EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
			return ERR_PTR(-EFSCORRUPTED);
		}
		if (unlikely(ino == dir->i_ino)) {
			EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
					 dentry);
			return ERR_PTR(-EFSCORRUPTED);
		}
		
		**// 2. inode 추출  검사**
		inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
		if (inode == ERR_PTR(-ESTALE)) {
			EXT4_ERROR_INODE(dir,
					 "deleted inode referenced: %u",
					 ino);
			return ERR_PTR(-EFSCORRUPTED);
		}
		if (!IS_ERR(inode) && IS_ENCRYPTED(dir) &&
		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
		    !fscrypt_has_permitted_context(dir, inode)) {
			ext4_warning(inode->i_sb,
				     "Inconsistent encryption contexts: %lu/%lu",
				     dir->i_ino, inode->i_ino);
			iput(inode);
			return ERR_PTR(-EPERM);
		}
	}

	if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
		/* Eventually we want to call d_add_ci(dentry, NULL)
		 * for negative dentries in the encoding case as
		 * well.  For now, prevent the negative dentry
		 * from being cached.
		 */
		return NULL;
	}

	// 3. inode와 dentry 연결
	// 캐시된 dentry가 있을 경우 그걸 사용
	return d_splice_alias(inode, dentry);
}

✨ ext4_lookup_entry()

  • 특정 dentry를 찾아 해당 inode 번호 등의 정보 추출

  • /fs/ext4/namei.c, $1715

static struct buffer_head *ext4_lookup_entry(struct inode *dir,
					     struct dentry *dentry,
					     struct ext4_dir_entry_2 **res_dir)
{
	int err;
	struct ext4_filename fname;
	struct buffer_head *bh;

	**// 1. 이름 포맷**
	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
	if (err == -ENOENT)
		return NULL;
	if (err)
		return ERR_PTR(err);

	**// 2. 엔트리 검색**
	bh = __ext4_find_entry(dir, &fname, res_dir, NULL);

	**// 3. 할당된 리소스 정리**
	ext4_fname_free_filename(&fname);
	return bh;
}
  • 파일 삭제 함수

  • /fs/namei.c, $4511

/**
 * vfs_unlink - unlink a filesystem object
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	parent directory
 * @dentry:	victim
 * @delegated_inode: returns victim inode, if the inode is delegated.
 *
 * The caller must hold dir->i_mutex.
 *
 * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
 * return a reference to the inode in delegated_inode.  The caller
 * should then break the delegation on that inode and retry.  Because
 * breaking a delegation may take a long time, the caller should drop
 * dir->i_mutex before doing so.
 *
 * Alternatively, a caller may pass NULL for delegated_inode.  This may
 * be appropriate for callers that expect the underlying filesystem not
 * to be NFS exported.
 *
 * If the inode has been found through an idmapped mount the idmap of
 * the vfsmount must be passed through @idmap. This function will then take
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 */
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
	       struct dentry *dentry, struct inode **delegated_inode)
{
	struct inode *target = dentry->d_inode;
	
	**// 1. 삭제 권한 검사**
	int error = may_delete(idmap, dir, dentry, 0);

	if (error)
		return error;

	if (!dir->i_op->unlink)
		return -EPERM;

	**// 2.  걸기**
	inode_lock(target);
	if (IS_SWAPFILE(target))
		error = -EPERM;
	else if (is_local_mountpoint(dentry))
		error = -EBUSY;
	else {
		error = security_inode_unlink(dir, dentry);
		if (!error) {
			error = try_break_deleg(target, delegated_inode);
			if (error)
				goto out;
			**// 3. 삭제**
			error = **dir->i_op->unlink(dir, dentry)**;
			if (!error) {
				dont_mount(dentry);
				detach_mounts(dentry);
			}
		}
	}
out:
	inode_unlock(target);

	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
	if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
		fsnotify_unlink(dir, dentry);
	} else if (!error) {
		**// 4. inode 링크  변경  해당 dentry 삭제를 알림**
		fsnotify_link_count(target);
		d_delete_notify(dir, dentry);
	}

	return error;
}
EXPORT_SYMBOL(vfs_unlink);
  • dir 디렉터리 내 dentry -> d_name 삭제 함수

  • fs/ext4/namei.c, $3308

static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
	int retval;

	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
		return -EIO;

	trace_ext4_unlink_enter(dir, dentry);
	/*
	 * Initialize quotas before so that eventual writes go
	 * in separate transaction
	 */
	retval = dquot_initialize(dir);
	if (retval)
		goto out_trace;
	retval = dquot_initialize(d_inode(dentry));
	if (retval)
		goto out_trace;

	retval = **__ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry)**;

	/* VFS negative dentries are incompatible with Encoding and
	 * Case-insensitiveness. Eventually we'll want avoid
	 * invalidating the dentries here, alongside with returning the
	 * negative dentries at ext4_lookup(), when it is  better
	 * supported by the VFS for the CI case.
	 */
	if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
		d_invalidate(dentry);

out_trace:
	trace_ext4_unlink_exit(dentry, retval);
	return retval;
}
  • dentry 검색 및 제거, inode 변경사항 기록

  • fs/ext4/namei.c, $3235

int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
		  struct inode *inode,
		  struct dentry *dentry /* NULL during fast_commit recovery */)
{
	int retval = -ENOENT;
	struct buffer_head *bh;
	struct ext4_dir_entry_2 *de;
	handle_t *handle;
	int skip_remove_dentry = 0;

	/*
	 * Keep this outside the transaction; it may have to set up the
	 * directory's encryption key, which isn't GFP_NOFS-safe.
	 */
	 
	**// 1. dentry 검색**
	bh = ext4_find_entry(dir, d_name, &de, NULL);
	if (IS_ERR(bh))
		return PTR_ERR(bh);

	if (!bh)
		return -ENOENT;

	**// 2. 찾은 디렉터리 엔트리의 inode 삭제 대상 inode 일치하는지 검사**
	if (le32_to_cpu(de->inode) != inode->i_ino) {
		/*
		 * It's okay if we find dont find dentry which matches
		 * the inode. That's because it might have gotten
		 * renamed to a different inode number
		 */
		
		if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
			skip_remove_dentry = 1;
		else
			goto out_bh;
	}
	
	**// 3. 저널 트랜잭션 시작**
	handle = ext4_journal_start(dir, EXT4_HT_DIR, // 분석하고 싶었으나 시간이 부족해..
				    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
	if (IS_ERR(handle)) {
		retval = PTR_ERR(handle);
		goto out_bh;
	}

	if (IS_DIRSYNC(dir))
		ext4_handle_sync(handle);

	if (!skip_remove_dentry) {
		**// 4. 디렉터리 엔트리 제거**
		retval = **ext4_delete_entry(handle, dir, de, bh)**;
		if (retval)
			goto out_handle;
		inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
		ext4_update_dx_flag(dir);
		retval = ext4_mark_inode_dirty(handle, dir);
		if (retval)
			goto out_handle;
	} else {
		retval = 0;
	}
	**// 5. 링크 --  orphan 처리**
	if (inode->i_nlink == 0)
		ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
				   d_name->len, d_name->name);
	else
		drop_nlink(inode);
	if (!inode->i_nlink)
		ext4_orphan_add(handle, inode);
	inode_set_ctime_current(inode);
	
	**// 6. inode 변경 사항 기록 (저널에 반영)**
	retval = ext4_mark_inode_dirty(handle, inode);
	if (dentry && !retval)
		ext4_fc_track_unlink(handle, dentry);
out_handle:
	ext4_journal_stop(handle);
out_bh:
	brelse(bh);
	return retval;
}

✨ ext4_delete_entry()

  • dentry 제거 및 inode=0 세팅

  • /fs/ext4/namei.c, $2721

static int ext4_delete_entry(handle_t *handle,
			     struct inode *dir,
			     struct ext4_dir_entry_2 *de_del,
			     struct buffer_head *bh)
{
	int err, csum_size = 0;

	**// 1. lnline 데이터 처리 (inode 디렉터리 저장)**
	if (ext4_has_inline_data(dir)) {
		int has_inline_data = 1;
		err = ext4_delete_inline_entry(handle, dir, de_del, bh,
					       &has_inline_data);
		if (has_inline_data)
			return err;
	}

	if (ext4_has_metadata_csum(dir->i_sb))
		csum_size = sizeof(struct ext4_dir_entry_tail);

	BUFFER_TRACE(bh, "get_write_access");
		**// 2. 블록에 대한 쓰기 권한을 저널에 요청**
		// 실패 시 수정 불가
	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
					    EXT4_JTR_NONE);
	if (unlikely(err))
		goto out;

	**// 3. 엔트리 제거 (inode = 0)**
	err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
					dir->i_sb->s_blocksize, csum_size);
	if (err)
		goto out;

	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
	err = ext4_handle_dirty_dirblock(handle, dir, bh);
	if (unlikely(err))
		goto out;

	return 0;
out:
	if (err != -ENOENT)
		ext4_std_error(dir->i_sb, err);
	return err;
}

🫧 참고 자료

카테고리:

업데이트: