Skip to the content.

softirq

softirq 是内核中非常重要的一个议题,基本上属于已经被反反复复的分析过,我看完各种教程之后,去看对应的代码, 总是还是感觉迷迷糊糊的,所以在这里总结的一下我的理解。

执行的位置

#0  __do_softirq () at kernel/softirq.c:529
#1  0xffffffff81132cca in invoke_softirq () at kernel/softirq.c:445
#2  __irq_exit_rcu () at kernel/softirq.c:650
#3  0xffffffff821df836 in sysvec_apic_timer_interrupt (regs=0xffffc90000093e38) at arch/x86/kernel/apic/apic.c:1107

等到 hardware irq 几乎结束的时候,在 __do_softirq 中,才会打开中断,这些 softriq 的 action 继续执行在 interrupt stack 上。

深入理解一下,为什么需要 spin_lock_bh

static __always_inline void spin_lock_bh(spinlock_t *lock)
{
    raw_spin_lock_bh(&lock->rlock);
}

执行的内容:屏蔽 softirq,然后 spin lock

有那么多中断,最后都汇集在 softirq 上,如何区分的?

使用网络为例,其实是存在很多网卡的,只有一个 softirq 。

void blk_mq_complete_request(struct request *rq)
{
	if (!blk_mq_complete_request_remote(rq))
		rq->q->mq_ops->complete(rq);
}

这个函数可以解释。

基本流程

触发:

@[
    trigger_load_balance+1
    update_process_times+134
    tick_sched_handle+34
    tick_sched_timer+113
    __hrtimer_run_queues+271
    hrtimer_interrupt+262
    __sysvec_apic_timer_interrupt+124
    sysvec_apic_timer_interrupt+157
    asm_sysvec_apic_timer_interrupt+22
    cpuidle_enter_state+222
    cpuidle_enter+41
    do_idle+492
    cpu_startup_entry+25
    start_secondary+271
    secondary_startup_64_no_verify+224
]: 14144

执行 hook 的时间:

@[
    run_rebalance_domains+1
    __softirqentry_text_start+237
    __irq_exit_rcu+216
    sysvec_apic_timer_interrupt+162
    asm_sysvec_apic_timer_interrupt+22
    cpuidle_enter_state+222
    cpuidle_enter+41
    do_idle+492
    cpu_startup_entry+25
    start_secondary+271
    secondary_startup_64_no_verify+224
]: 1228

TODO

Notes

softirq_action 可能是紧跟着 hardirq 执行的,也可能是在 softirqd 中间执行的。

static __init int spawn_ksoftirqd(void)
{
    cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
                  takeover_tasklets);
    BUG_ON(smpboot_register_percpu_thread(&softirq_threads));

    return 0;
}

一个小证据,从 nvme 到 softirq : 在 queue_request_irq 注册 irq handler 为 nvme_irq

/*
 * This function must run with irqs disabled!
 */
inline void raise_softirq_irqoff(unsigned int nr)
{
    __raise_softirq_irqoff(nr);

    /*
     * If we're in an interrupt or softirq, we're done
     * (this also catches softirq-disabled code). We will
     * actually run the softirq once we return from
     * the irq or softirq.
     *
     * Otherwise we wake up ksoftirqd to make sure we
     * schedule the softirq soon.
     */
    if (!in_interrupt())
        wakeup_softirqd();
}

Pending softirqs are checked for and executed in the following places:

LKD chapter 8

softirq 是一定在 中断的上下文中执行吗

需要观察一下,从中断上下文到的 task 的切换

rcu_read_lock_bh

void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
{
    WARN_ON_ONCE(in_hardirq());
    lockdep_assert_irqs_enabled();
    /*
     * Are softirqs going to be turned on now:
     */
    if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
        lockdep_softirqs_on(ip);
    /*
     * Keep preemption disabled until we are done with
     * softirq processing:
     */
    __preempt_count_sub(cnt - 1);

    if (unlikely(!in_interrupt() && local_softirq_pending())) {
        /*
         * Run softirq if any pending. And do it in its own stack
         * as we may be calling this deep in a task call stack already.
         */
        do_softirq();
    }

    preempt_count_dec();
    preempt_check_resched();
}

让我多看点资料吧

RPS : 将 softirq 迁移到其他的 CPU 中

softirq 将 lock 迁移的位置有什么关系哇

[ ] 无法理解为什么 lock 需要考虑 softirq

[ ] 运行 softirq 的 stack 的是在哪里

区分 do_softirqraise_softirq

@[
    raise_softirq+1
    trigger_load_balance+131
    update_process_times+176
    tick_sched_handle+52
    tick_sched_timer+122
    __hrtimer_run_queues+298
    hrtimer_interrupt+252
    __sysvec_apic_timer_interrupt+92
    sysvec_apic_timer_interrupt+55
    asm_sysvec_apic_timer_interrupt+18
]: 217
@[
    do_softirq+1
    __local_bh_enable_ip+75
    ip_finish_output2+399
    __ip_queue_xmit+370
    __tcp_transmit_skb+2600
    tcp_write_xmit+909
    tcp_sendmsg_locked+716
    tcp_sendmsg+40
    sock_sendmsg+87
    sock_write_iter+151
    new_sync_write+409
    vfs_write+462
    ksys_write+167
    do_syscall_64+59
    entry_SYSCALL_64_after_hwframe+68
]: 334
@[
    raise_softirq+1
    rcu_sched_clock_irq+337
    update_process_times+140
    tick_sched_handle+52
    tick_sched_timer+122
    __hrtimer_run_queues+298
    hrtimer_interrupt+252
    __sysvec_apic_timer_interrupt+92
    sysvec_apic_timer_interrupt+55
    asm_sysvec_apic_timer_interrupt+18
]: 352
@[
    raise_softirq+1
    trigger_load_balance+131
    update_process_times+176
    tick_sched_handle+52
    tick_sched_timer+122
    __hrtimer_run_queues+298
    hrtimer_interrupt+252
    __sysvec_apic_timer_interrupt+92
    sysvec_apic_timer_interrupt+109
    asm_sysvec_apic_timer_interrupt+18
    native_safe_halt+11
    __cpuidle_text_start+10
    default_idle_call+53
    do_idle+501
    cpu_startup_entry+25
    secondary_startup_64_no_verify+194
]: 451
@[
    raise_softirq+1
    update_process_times+133
    tick_sched_handle+52
    tick_sched_timer+122
    __hrtimer_run_queues+298
    hrtimer_interrupt+252
    __sysvec_apic_timer_interrupt+92
    sysvec_apic_timer_interrupt+109
    asm_sysvec_apic_timer_interrupt+18
    native_safe_halt+11
    __cpuidle_text_start+10
    default_idle_call+53
    do_idle+501
    cpu_startup_entry+25
    secondary_startup_64_no_verify+194
]: 873
@[
    raise_softirq+1
    rcu_sched_clock_irq+337
    update_process_times+140
    tick_sched_handle+52
    tick_sched_timer+122
    __hrtimer_run_queues+298
    hrtimer_interrupt+252
    __sysvec_apic_timer_interrupt+92
    sysvec_apic_timer_interrupt+109
    asm_sysvec_apic_timer_interrupt+18
    native_safe_halt+11
    __cpuidle_text_start+10
    default_idle_call+53
    do_idle+501
    cpu_startup_entry+25
    secondary_startup_64_no_verify+194
]: 1832

看一个从存储的 softirq 的场景

#0  reschedule_retry (r1_bio=0xffff888172f0d080) at drivers/md/raid1.c:279
#1  0xffffffff8174bb11 in req_bio_endio (error=0 '\000', nbytes=65536, bio=0xffff888172d78600, rq=0xffff8881067d7280) at block/blk-mq.c:795
#2  blk_update_request (req=req@entry=0xffff8881067d7280, error=error@entry=0 '\000', nr_bytes=196608, nr_bytes@entry=262144) at block/blk-mq.c:927
#3  0xffffffff81ba1d47 in scsi_end_request (req=req@entry=0xffff8881067d7280, error=error@entry=0 '\000', bytes=bytes@entry=262144) at drivers/scsi/scsi_lib.c:538
#4  0xffffffff81ba288e in scsi_io_completion (cmd=0xffff8881067d7388, good_bytes=262144) at drivers/scsi/scsi_lib.c:976
#5  0xffffffff817482ed in blk_complete_reqs (list=<optimized out>) at block/blk-mq.c:1132
#6  0xffffffff822a98d4 in __do_softirq () at kernel/softirq.c:571
#7  0xffffffff811496e6 in invoke_softirq () at kernel/softirq.c:445
#8  __irq_exit_rcu () at kernel/softirq.c:650
#9  0xffffffff81149eee in irq_exit_rcu () at kernel/softirq.c:662
#10 0xffffffff82290dd4 in common_interrupt (regs=0xffffc900000e7e38, error_code=<optimized out>) at arch/x86/kernel/irq.c:240
Backtrace stopped: Cannot access memory at address 0xffffc90000349010