Skip to the content.

总结一下各种虚拟文件系统

  1. debugfs
  2. tracefs

fs/seq_file.c

nobdv fs

tmp proc sysfs ramfs 和 ext2 fs 应该可以作为两个典型

怀疑这个分类有点问题,正确的分类应该参考几个 mount 函数

kernfs 也是利用 seq 中间的内容,那么 libfs 和 kernfs 的区别在于什么地方呀 ?

通过 ramfs 理解 libfs 吧 !

trace fs

增加了 fs/tracefs/event_inode.c 来实现 inode / dentry 的动态分配,

find /sys/kernel/debug/tracing -type f wc -l

正如

kernfs

到底谁在依赖 kernfs ,通过搜索 select KERNFS 可以找到 sysfs cgroup psi 是通过 kernfs 来构建的。

从这个看起

🧀 cat /proc/mounts | awk ‘{ print $3 }’ devtmpfs devpts tmpfs proc tmpfs ramfs tmpfs sysfs ext4 securityfs cgroup2 pstore efivarfs bpf mqueue hugetlbfs debugfs nfsd configfs fusectl vfat rpc_pipefs tmpfs fuse.gvfsd-fuse fuse.portal overlay nsfs

debugfs

https://docs.kernel.org/filesystems/debugfs.html

ramfs

fs/ramfs/

有趣之处在于:

  1. ramfs 需要考虑 nommu
  2. ramfs 真的被使用过吗?

记得使用的不是 ramfs ,而是 tmpfs 才对

ramfs 真的被使用过吗? 所以和 init/initramfs.c 是什么关系?

现在想想,initramfs 的代码为什么无法使用

之前记录的内容:

If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by default. To force ramfs, add “rootfstype=ramfs” to the kernel command line.

Rootfs is a special instance of ramfs (or tmpfs, if that’s enabled), which is always present in 2.6 systems. You can’t unmount rootfs for approximately the same reason you can’t kill the init process;

ramfs 总是 builtin 的:

obj-y				+= ramfs/

ramfs

然后,显然启动到内核之后,让 grub 同时加载 kernel 和 initrd.img 的,在 initrd.img 中包含了各种驱动,例如 nvme 之类的来实现 真正的 mount 的。

virtual fs

各种 fs 的区别内核文档1,这个 blog 已经分析过一次2,我就不重复了,下面是我的总结。

ramdisk 使用 ram 模拟 disk,这种方式已经被抛弃了。

fs explanation
ramfs ramdisk 的改进,大小可以伸缩,无需 page cache 缓存
tmpfs 基于 ramfs 的 virtual fs, fs 一般的存储介质是 disk / ssd,tmpfs 的存储介质是内存

rootfs 是 系统启动的时候的临时 fs ,rootfs 的实现为 ramfs 或者 tmpfs, 用于挂载 real root fs

kernel 启动的时候,可以指定 rootfs 中存储的内容,从而在 mount real root fs 之前搞

内容 差别
initrd 老版本
initramfs initrd 的改进版本

这里空说,实际上难以感受在说啥

将 hack/qemu/bare-metal/ 下的内容整理一下吧!

分析一样应用

问题

为什么 ramfs 还是被使用了

[  162.016999]  <TASK>
[  162.016999]  dump_stack_lvl+0x34/0x48
[  162.016999]  ramfs_create+0x16/0x32
[  162.016999]  path_openat+0xda8/0xfc0
[  162.016999]  ? write_buffer+0x36/0x36
[  162.016999]  do_filp_open+0xad/0x150
[  162.016999]  ? init_stat+0x2e/0x7a
[  162.016999]  ? preempt_count_add+0x48/0xa0
[  162.016999]  file_open_name+0xec/0x1b0
[  162.016999]  filp_open+0x27/0x50
[  162.016999]  do_name+0xbf/0x282
[  162.016999]  write_buffer+0x22/0x36
[  162.016999]  flush_buffer+0x26/0x82
[  162.016999]  ? initrd_load+0x3e/0x3e
[  162.016999]  __gunzip+0x28a/0x313
[  162.016999]  ? bunzip2+0x3ae/0x3ae
[  162.016999]  gunzip+0xe/0x15
[  162.016999]  ? initrd_load+0x3e/0x3e
[  162.016999]  unpack_to_rootfs+0x155/0x28d
[  162.016999]  ? initrd_load+0x3e/0x3e
[  162.016999]  do_populate_rootfs+0x54/0x106
[  162.016999]  async_run_entry_fn+0x18/0xa0
[  162.016999]  process_one_work+0x1d4/0x3a0
[  162.016999]  worker_thread+0x48/0x3c0
[  162.016999]  ? rescuer_thread+0x380/0x380
[  162.016999]  kthread+0xe0/0x110
[  162.016999]  ? kthread_complete_and_exit+0x20/0x20
[  162.016999]  ret_from_fork+0x1f/0x30
[  162.016999]  </TASK>
struct file_system_type rootfs_fs_type = {
    .name       = "rootfs",
    .init_fs_context = rootfs_init_fs_context,
    .kill_sb    = kill_litter_super,
};

观测一下 ramfs 和 shmem 的选择过程:

简单分析 init/initramfs 中的代码

#0  populate_rootfs () at init/initramfs.c:755
#1  0xffffffff81000e7c in do_one_initcall (fn=0xffffffff832e4a0e <populate_rootfs>) at init/main.c:1296
#2  0xffffffff832e3491 in do_initcall_level (command_line=0xffff8882000fd3c0 "root", level=5) at init/main.c:1369
#3  do_initcalls () at init/main.c:1385
#4  do_basic_setup () at init/main.c:1404
#5  kernel_init_freeable () at init/main.c:1611
#6  0xffffffff81edeae1 in kernel_init (unused=<optimized out>) at init/main.c:1500
#7  0xffffffff81001a8f in ret_from_fork () at arch/x86/entry/entry_64.S:306
#8  0x0000000000000000 in ?? ()
#0  do_populate_rootfs (unused=0x0 <fixed_percpu_data>, cookie=1) at init/initramfs.c:699
#1  0xffffffff8112ed88 in async_run_entry_fn (work=0xffff8881212f7320) at kernel/async.c:127
#2  0xffffffff811225e4 in process_one_work (worker=worker@entry=0xffff888100050b40, work=0xffff8881212f7320) at kernel/workqueue.c:2289
#3  0xffffffff81122b78 in worker_thread (__worker=0xffff888100050b40) at kernel/workqueue.c:2436
#4  0xffffffff81129520 in kthread (_create=0xffff888100164040) at kernel/kthread.c:376
#5  0xffffffff81001a8f in ret_from_fork () at arch/x86/entry/entry_64.S:306
#6  0x0000000000000000 in ?? ()
config BLK_DEV_INITRD
    bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
    help
      The initial RAM filesystem is a ramfs which is loaded by the
      boot loader (loadlin or lilo) and that is mounted as root
      before the normal boot procedure. It is typically used to
      load modules needed to mount the "real" root file system,
      etc. See <file:Documentation/admin-guide/initrd.rst> for details.

      If RAM disk support (BLK_DEV_RAM) is also included, this
      also enables initial RAM disk (initrd) support and adds
      15 Kbytes (more on some other architectures) to the kernel size.

      If unsure say Y.

但是,这是第一个:

  shmem_create (mnt_userns=0xffffffff82a61920 <init_user_ns>, dir=0xffff88822127b090, dentry=0xffff888222c950c0, mode=33188, excl=false) at mm/shmem.c:2952
#1  0xffffffff8135a408 in lookup_open (op=0xffffc9000005fedc, op=0xffffc9000005fedc, got_write=true, file=0xffff888221cb2a00, nd=0xffffc9000005fdc0) at fs/namei.c:3413
#2  open_last_lookups (op=0xffffc9000005fedc, file=0xffff888221cb2a00, nd=0xffffc9000005fdc0) at fs/namei.c:3481
#3  path_openat (nd=nd@entry=0xffffc9000005fdc0, op=op@entry=0xffffc9000005fedc, flags=flags@entry=65) at fs/namei.c:3688
#4  0xffffffff8135b57d in do_filp_open (dfd=dfd@entry=-100, pathname=pathname@entry=0xffff888221191000, op=op@entry=0xffffc9000005fedc) at fs/namei.c:3718
#5  0xffffffff81345145 in do_sys_openat2 (dfd=dfd@entry=-100, filename=<optimized out>, how=how@entry=0xffffc9000005ff18) at fs/open.c:1311
#6  0xffffffff81345520 in do_sys_open (mode=<optimized out>, flags=<optimized out>, filename=<optimized out>, dfd=-100) at fs/open.c:1327
#7  __do_sys_open (mode=<optimized out>, flags=<optimized out>, filename=<optimized out>) at fs/open.c:1335
#8  __se_sys_open (mode=<optimized out>, flags=<optimized out>, filename=<optimized out>) at fs/open.c:1331
#9  __x64_sys_open (regs=<optimized out>) at fs/open.c:1331
#10 0xffffffff81ed9c38 in do_syscall_x64 (nr=<optimized out>, regs=0xffffc9000005ff58) at arch/x86/entry/common.c:50
#11 do_syscall_64 (regs=0xffffc9000005ff58, nr=<optimized out>) at arch/x86/entry/common.c:80
#12 0xffffffff8200009b in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:120

seq_file.c

内核文档

文档写的有点复杂,简单来说:

  1. next 移动指针,show 展示内容,让内核可以方面的保留出来一个数组或者链表之类的东西到用户态空间。

  2. 这里各种东西的辅助函数,似乎划分为 file operations 和 seq hlist 两个类别的。
  3. struct seq_file 如何初始化的 ? 谁来使用

seq_read

  1. seq_read is generic function which take advantage of seq_operations
    1. seq_operations is register at seq_open
    2. here is general calling graph:
      1. vfs -> debugfs_kprobes_operations::open -> seq_open -> register kprobes_seq_ops
      2. vfs -> debugfs_kprobes_operations::read -> seq_read -> kprobes_seq_ops::show_kprobe_addr

seq_open : seq_open is simple, but it’s comment describe how seq_operations function

/**
 *	seq_open -	initialize sequential file
 *	@file: file we initialize
 *	@op: method table describing the sequence
 *
 *	seq_open() sets @file, associating it with a sequence described
 *	by @op.  @op->start() sets the iterator up and returns the first
 *	element of sequence. @op->stop() shuts it down.  @op->next()
 *	returns the next element of sequence.  @op->show() prints element
 *	into the buffer.  In case of error ->start() and ->next() return
 *	ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
 *	returns 0 in case of success and negative number in case of error.
 *	Returning SEQ_SKIP means "discard this element and move on".
 *	Note: seq_open() will allocate a struct seq_file and store its
 *	pointer in @file->private_data. This pointer should not be modified.
 */
int seq_open(struct file *file, const struct seq_operations *op)

core struct

struct seq_file {
	char *buf;
	size_t size;
	size_t from;
	size_t count;
	size_t pad_until;
	loff_t index;
	loff_t read_pos;
	u64 version;
	struct mutex lock;
	const struct seq_operations *op;
	int poll_event;
	const struct file *file;
	void *private;
};

struct seq_operations {
	void * (*start) (struct seq_file *m, loff_t *pos);
	void (*stop) (struct seq_file *m, void *v);
	void * (*next) (struct seq_file *m, void *v, loff_t *pos);
	int (*show) (struct seq_file *m, void *v);
};

seq_puts : seq_file maintains a buffer, seq_read will copy the buffer to userland !

void seq_puts(struct seq_file *m, const char *s)
{
	int len = strlen(s);

	if (m->count + len >= m->size) {
		seq_set_overflow(m);
		return;
	}
	memcpy(m->buf + m->count, s, len);
	m->count += len;
}
EXPORT_SYMBOL(seq_puts);

记录一个小问题

static int show_partition(struct seq_file *seqf, void *v)
{
	struct gendisk *sgp = v;
	struct block_device *part;
	unsigned long idx;

	if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
		return 0;

	rcu_read_lock();
	xa_for_each(&sgp->part_tbl, idx, part) {
		if (!bdev_nr_sectors(part))
			continue;
		seq_printf(seqf, "%4d  %7d %10llu %pg\n",
			   MAJOR(part->bd_dev), MINOR(part->bd_dev),
			   bdev_nr_sectors(part) >> 1, part);
	}
	rcu_read_unlock();
	return 0;
}

为什么 %pg 是输出 partion 的 name ,而且 part 也不是 name 啊

major minor  #blocks  name

 259        0 1000204632 nvme1n1
 259        1 1000203264 nvme1n1p1
 259        2 1000204632 nvme0n1
 259        3  984080384 nvme0n1p1
 259        4   15624192 nvme0n1p2
 259        5     498688 nvme0n1p3
   8        0 1953514584 sda
   8        1 1953513472 sda1

本站所有文章转发 CSDN 将按侵权追究法律责任,其它情况随意。

  1. http://junyelee.blogspot.com/2020/03/ramfs-rootfs-and-initramfs.html 

  2. https://docs.kernel.org/filesystems/ramfs-rootfs-initramfs.html