本篇文章主要是自己的学习笔记,主要内容是分析linux系统中设备的Suspend和Resume流程,用到的内核版本为 linux-4.14。
目录
1、Linux 内核的Suspend方法
在 Linux 内核中有三种Suspend 的方法,分别是 Freeze、Standby、Suspend to RAM,在用户空间向 /sys/power/state 文件写入“freeze”、”standby”、”mem”就可以触发相应的Suspend,如下所示,关于这几种 Suspend 的区别如果大家感兴趣可以自行去查阅资料,这里就不在暂开了。
echo "freeze" > /sys/power/state
echo "standby" > /sys/power/state
echo "mem" > /sys/power/state
当执行上面命令会通过 sysfs 陷入到内核,并触发 Suspend ,相应的处理代码如下:
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
suspend_state_t state;
int error;
error = pm_autosleep_lock();
if (error)
return error;
if (pm_autosleep_state() > PM_SUSPEND_ON) {
error = -EBUSY;
goto out;
}
state = decode_state(buf, n);
if (state < PM_SUSPEND_MAX) {
if (state == PM_SUSPEND_MEM)
state = mem_sleep_current;
error = pm_suspend(state);
} else if (state == PM_SUSPEND_MAX) {
error = hibernate();
} else {
error = -EINVAL;
}
out:
pm_autosleep_unlock();
return error ? error : n;
}
decode_state 函数主要功能是 根据输入的字符串进行相应的匹配,返回值 state 的定义如下:
typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_TO_IDLE ((__force suspend_state_t) 1)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 2)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
#define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
如果 state 满足相关条件就会进入 pm_suspend 函数,该函数定义如下:
int pm_suspend(suspend_state_t state)
{
int error;
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
error = enter_state(state);
if (error) {
suspend_stats.fail++;
dpm_save_failed_errno(error);
} else {
suspend_stats.success++;
}
pr_info("suspend exit\n");
return error;
}
然后再进入 enter_state 函数,定义如下:
static int enter_state(suspend_state_t state)
{
int error;
trace_suspend_resume(TPS("suspend_enter"), state, true);
if (state == PM_SUSPEND_TO_IDLE) {
#ifdef CONFIG_PM_DEBUG
if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
pr_warn("Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");
return -EAGAIN;
}
#endif
} else if (!valid_state(state)) {
return -EINVAL;
}
if (!mutex_trylock(&pm_mutex))
return -EBUSY;
if (state == PM_SUSPEND_TO_IDLE)
s2idle_begin();
#ifndef CONFIG_SUSPEND_SKIP_SYNC
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
pr_info("Syncing filesystems ... ");
sys_sync();
pr_cont("done.\n");
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
#endif
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
error = suspend_prepare(state);
if (error)
goto Unlock;
if (suspend_test(TEST_FREEZER))
goto Finish;
trace_suspend_resume(TPS("suspend_enter"), state, false);
pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
pm_restrict_gfp_mask();
error = suspend_devices_and_enter(state);
pm_restore_gfp_mask();
Finish:
events_check_enabled = false;
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
mutex_unlock(&pm_mutex);
return error;
}
(1)valid_state 函数主要是用来检查平台是否支持该电源状态,该函数的定义如下:
static bool valid_state(suspend_state_t state)
{
/*
* PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level
* support and need to be valid to the low level
* implementation, no valid callback implies that none are valid.
*/
return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
}
通过注释也能够大概知道该函数的作用,当 state 等于 standby 或者 mem时,则需要调用suspend_ops 中的 valid 回调,通过底层平台代码判断是否支持,关于 valid 回调的实现后续会介绍。
(2)suspend_prepare 函数主要进行 suspend 前的准备,比如 switch console 和 thread freezing,如果失败,则终止 suspend。函数定义如下:
/**
* suspend_prepare - Prepare for entering system sleep state.
*
* Common code run for every system sleep state that can be entered (except for
* hibernation). Run suspend notifiers, allocate the "suspend" console and
* freeze processes.
*/
static int suspend_prepare(suspend_state_t state)
{
int error, nr_calls = 0;
if (!sleep_state_supported(state))
return -EPERM;
pm_prepare_console();
error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls);
if (error) {
nr_calls--;
goto Finish;
}
trace_suspend_resume(TPS("freeze_processes"), 0, true);
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
if (!error)
return 0;
suspend_stats.failed_freeze++;
dpm_save_failed_step(SUSPEND_FREEZE);
Finish:
__pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL);
pm_restore_console();
return error;
}
sleep_state_supported : 检查suspend_ops是否有提供.enter回调,该回调会在后面使用到。
static bool sleep_state_supported(suspend_state_t state)
{
return state == PM_SUSPEND_TO_IDLE || (suspend_ops && suspend_ops->enter);
}
pm_prepare_console :将当前console切换到一个虚拟console。
__pm_notifier_call_chain :发送开始 suspend 的消息。
suspend_freeze_processes :freeze用户空间进程和一些内核线程。
(3)suspend_devices_and_enter 函数的实现如下:
/**
* suspend_devices_and_enter - Suspend devices and enter system sleep state.
* @state: System sleep state to enter.
*/
int suspend_devices_and_enter(suspend_state_t state)
{
int error;
bool wakeup = false;
if (!sleep_state_supported(state))
return -ENOSYS;
pm_suspend_target_state = state;
error = platform_suspend_begin(state);
if (error)
goto Close;
suspend_console();
suspend_test_start();
error = dpm_suspend_start(PMSG_SUSPEND);
if (error) {
pr_err("Some devices failed to suspend, or early wake event detected\n");
goto Recover_platform;
}
suspend_test_finish("suspend devices");
if (suspend_test(TEST_DEVICES))
goto Recover_platform;
do {
error = suspend_enter(state, &wakeup);
} while (!error && !wakeup && platform_suspend_again(state));
Resume_devices:
suspend_test_start();
dpm_resume_end(PMSG_RESUME);
suspend_test_finish("resume devices");
trace_suspend_resume(TPS("resume_console"), state, true);
resume_console();
trace_suspend_resume(TPS("resume_console"), state, false);
Close:
platform_resume_end(state);
pm_suspend_target_state = PM_SUSPEND_ON;
return error;
Recover_platform:
platform_recover(state);
goto Resume_devices;
}
sleep_state_supported : 再次检查suspend_ops是否有提供.enter回调。
platform_suspend_begin :通过suspend_ops 提供了 begin 回调则调用,通知平台代码,让其作相应的处理。
suspend_console :挂起console。
suspend_test_start :记录系统挂起的开始时间点。
dpm_suspend_start:调用所有设备的->prepare和->suspend回调函数
函数的实现如下:
/**
* dpm_suspend_start - Prepare devices for PM transition and suspend them.
* @state: PM transition of the system being carried out.
*
* Prepare all non-sysdev devices for system PM transition and execute "suspend"
* callbacks for them.
*/
int dpm_suspend_start(pm_message_t state)
{
int error;
error = dpm_prepare(state);
if (error) {
suspend_stats.failed_prepare++;
dpm_save_failed_step(SUSPEND_PREPARE);
} else
error = dpm_suspend(state);
return error;
}
dpm_prepare :对非系统的设备进行prepare,在这个函数之后,设备的子系统就不能再注册了。
dpm_suspend :调用所有非系统设备的 suspend 函数,设备将停止操作。
dpm_suspend 函数的调用流程如下:
int dpm_suspend(pm_message_t state)
==> error = device_suspend(dev);
==> return __device_suspend(dev, pm_transition, false);
2、__device_suspend 函数
dpm_suspend函数最后是调用到了__device_suspend 函数,该函数的实现如下:
/**
* device_suspend - Execute "suspend" callbacks for given device.
* @dev: Device to handle.
* @state: PM transition of the system being carried out.
* @async: If true, the device is being suspended asynchronously.
*/
static int __device_suspend(struct device *dev, pm_message_t state, bool async)
{
pm_callback_t callback = NULL;
const char *info = NULL;
int error = 0;
DECLARE_DPM_WATCHDOG_ON_STACK(wd);
TRACE_DEVICE(dev);
TRACE_SUSPEND(0);
dpm_wait_for_subordinate(dev, async);
if (async_error) {
dev->power.direct_complete = false;
goto Complete;
}
/*
* If a device configured to wake up the system from sleep states
* has been suspended at run time and there's a resume request pending
* for it, this is equivalent to the device signaling wakeup, so the
* system suspend operation should be aborted.
*/
if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
pm_wakeup_event(dev, 0);
if (pm_wakeup_pending()) {
dev->power.direct_complete = false;
async_error = -EBUSY;
goto Complete;
}
if (dev->power.syscore)
goto Complete;
/* Avoid direct_complete to let wakeup_path propagate. */
if (device_may_wakeup(dev) || dev->power.wakeup_path)
dev->power.direct_complete = false;
if (dev->power.direct_complete) {
if (pm_runtime_status_suspended(dev)) {
pm_runtime_disable(dev);
if (pm_runtime_status_suspended(dev))
goto Complete;
pm_runtime_enable(dev);
}
dev->power.direct_complete = false;
}
dpm_watchdog_set(&wd, dev);
device_lock(dev);
if (dev->pm_domain) {
info = "power domain ";
callback = pm_op(&dev->pm_domain->ops, state);
goto Run;
}
if (dev->type && dev->type->pm) {
info = "type ";
callback = pm_op(dev->type->pm, state);
goto Run;
}
if (dev->class) {
if (dev->class->pm) {
info = "class ";
callback = pm_op(dev->class->pm, state);
goto Run;
} else if (dev->class->suspend) {
pm_dev_dbg(dev, state, "legacy class ");
error = legacy_suspend(dev, state, dev->class->suspend,
"legacy class ");
goto End;
}
}
if (dev->bus) {
if (dev->bus->pm) {
info = "bus ";
callback = pm_op(dev->bus->pm, state);
} else if (dev->bus->suspend) {
pm_dev_dbg(dev, state, "legacy bus ");
error = legacy_suspend(dev, state, dev->bus->suspend,
"legacy bus ");
goto End;
}
}
Run:
if (!callback && dev->driver && dev->driver->pm) {
info = "driver ";
callback = pm_op(dev->driver->pm, state);
}
#ifdef CONFIG_MTK_RAM_CONSOLE
if (async)
aee_rr_rec_last_async_func((unsigned long int)callback);
else
aee_rr_rec_last_sync_func((unsigned long int)callback);
#endif
error = dpm_run_callback(callback, dev, state, info);
End:
if (!error) {
struct device *parent = dev->parent;
dev->power.is_suspended = true;
if (parent) {
spin_lock_irq(&parent->power.lock);
dev->parent->power.direct_complete = false;
if (dev->power.wakeup_path
&& !dev->parent->power.ignore_children)
dev->parent->power.wakeup_path = true;
spin_unlock_irq(&parent->power.lock);
}
dpm_clear_suppliers_direct_complete(dev);
} else {
log_suspend_abort_reason("Callback failed on %s in %pS returned %d",
dev_name(dev), callback, error);
}
device_unlock(dev);
dpm_watchdog_clear(&wd);
Complete:
if (error)
async_error = error;
complete_all(&dev->power.completion);
TRACE_SUSPEND(error);
return error;
}
通过注释就可以看出这个函数是执行系统中给定设备的 suspend 回调函数。
在旧版本的 linux 内核中,这些callbacks是放在设备模型的已经结构体中,比如struct bus_type、struct device/driver、struct class 等这些数据结构中都会有suspend/resume函数的身影,但这样做就不太具备良好的封装特性,和实用性。
后来就将这些Callbacks封装为一个统一的数据结构,也就是 struct dev_pm_ops ,上层的数据结构只需要包含这个结构即可。该结构体的定义如下:
struct dev_pm_ops {
int (*prepare)(struct device *dev);
void (*complete)(struct device *dev);
int (*suspend)(struct device *dev);
int (*resume)(struct device *dev);
int (*freeze)(struct device *dev);
int (*thaw)(struct device *dev);
int (*poweroff)(struct device *dev);
int (*restore)(struct device *dev);
int (*suspend_late)(struct device *dev);
int (*resume_early)(struct device *dev);
int (*freeze_late)(struct device *dev);
int (*thaw_early)(struct device *dev);
int (*poweroff_late)(struct device *dev);
int (*restore_early)(struct device *dev);
int (*suspend_noirq)(struct device *dev);
int (*resume_noirq)(struct device *dev);
int (*freeze_noirq)(struct device *dev);
int (*thaw_noirq)(struct device *dev);
int (*poweroff_noirq)(struct device *dev);
int (*restore_noirq)(struct device *dev);
int (*runtime_suspend)(struct device *dev);
int (*runtime_resume)(struct device *dev);
int (*runtime_idle)(struct device *dev);
};
这里面的 callbacks 都是和具体设备挂钩的,比如 suspend / resume ,callbacks 的实现和具体的设备有很大关系,这就需要工程师在设计Driver的时候,知道这些 callbacks 的使用场景,根据具体的需求进行分析。
回到 __device_suspend 函数中,callback = pm_op() 函数就是用来获取设备相应的回调函数,保存在callback 变量中,调用顺序为
-> struct dev_pm_domain *pm_domain
-> struct device_type *type;
-> struct class *class;
-> struct bus_type *bus;
3、pm_op 函数
通过上面的分析可以知道__device_suspend函数最后是调用了pm_op函数,函数实现如下:
/**
* pm_op - Return the PM operation appropriate for given PM event.
* @ops: PM operations to choose from.
* @state: PM transition of the system being carried out.
*/
static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
{
switch (state.event) {
#ifdef CONFIG_SUSPEND
case PM_EVENT_SUSPEND:
return ops->suspend;
case PM_EVENT_RESUME:
return ops->resume;
#endif /* CONFIG_SUSPEND */
#ifdef CONFIG_HIBERNATE_CALLBACKS
case PM_EVENT_FREEZE:
case PM_EVENT_QUIESCE:
return ops->freeze;
case PM_EVENT_HIBERNATE:
return ops->poweroff;
case PM_EVENT_THAW:
case PM_EVENT_RECOVER:
return ops->thaw;
break;
case PM_EVENT_RESTORE:
return ops->restore;
#endif /* CONFIG_HIBERNATE_CALLBACKS */
}
return NULL;
}
这样便得到设备的 suspend callback 函数,然后继续往下可以看到 dpm_run_callback(callback, dev, state, info) 函数,该函数就是运行前面获取的 callback,函数的定义如下:
static int dpm_run_callback(pm_callback_t cb, struct device *dev,
pm_message_t state, char *info)
{
ktime_t calltime;
int error;
if (!cb)
return 0;
......
error = cb(dev);
......
return error;
}
这样就能够执行到 driver 中的 suspend 回调函数。
系统在 suspend/resume 的过程中,会依次调用 prepare —> suspend —> suspend_late —> suspend_noirq —> wakeup —> resume_noirq —> resume_early —> resume。目前就是调用到 suspend 函数,其它的调用流程会在后面体现。
4、suspend_enter 函数
好了,接下来分析其它代码,现在回到suspend_devices_ and_enter 函数中,dpm_suspend_start 函数已经分析完了,现在分析 suspend_enter 函数,该函数的定义如下:
/**
* suspend_enter - Make the system enter the given sleep state.
* @state: System sleep state to enter.
* @wakeup: Returns information that the sleep state should not be re-entered.
*
* This function should be called after devices have been suspended.
*/
static int suspend_enter(suspend_state_t state, bool *wakeup)
{
int error, last_dev;
error = platform_suspend_prepare(state);
if (error)
goto Platform_finish;
error = dpm_suspend_late(PMSG_SUSPEND);
if (error) {
last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
last_dev %= REC_FAILED_NUM;
pr_err("late suspend of devices failed\n");
log_suspend_abort_reason("late suspend of %s device failed",
suspend_stats.failed_devs[last_dev]);
goto Platform_finish;
}
error = platform_suspend_prepare_late(state);
if (error)
goto Devices_early_resume;
if (state == PM_SUSPEND_TO_IDLE && pm_test_level != TEST_PLATFORM) {
s2idle_loop();
goto Platform_early_resume;
}
error = dpm_suspend_noirq(PMSG_SUSPEND);
if (error) {
last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
last_dev %= REC_FAILED_NUM;
pr_err("noirq suspend of devices failed\n");
log_suspend_abort_reason("noirq suspend of %s device failed",
suspend_stats.failed_devs[last_dev]);
goto Platform_early_resume;
}
error = platform_suspend_prepare_noirq(state);
if (error)
goto Platform_wake;
if (suspend_test(TEST_PLATFORM))
goto Platform_wake;
error = disable_nonboot_cpus();
if (error || suspend_test(TEST_CPUS)) {
log_suspend_abort_reason("Disabling non-boot cpus failed");
goto Enable_cpus;
}
arch_suspend_disable_irqs();
BUG_ON(!irqs_disabled());
error = syscore_suspend();
if (!error) {
*wakeup = pm_wakeup_pending();
if (!(suspend_test(TEST_CORE) || *wakeup)) {
trace_suspend_resume(TPS("machine_suspend"),
state, true);
error = suspend_ops->enter(state);
trace_suspend_resume(TPS("machine_suspend"),
state, false);
} else if (*wakeup) {
error = -EBUSY;
}
syscore_resume();
}
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
Enable_cpus:
enable_nonboot_cpus();
Platform_wake:
platform_resume_noirq(state);
dpm_resume_noirq(PMSG_RESUME);
Platform_early_resume:
platform_resume_early(state);
Devices_early_resume:
dpm_resume_early(PMSG_RESUME);
Platform_finish:
platform_resume_finish(state);
return error;
}
platform_suspend_prepare : 检查平台是否提供suspend_ops->prepare() 函数。
dpm_suspend_late:延迟挂起设备,在最后阶段挂起设备。
platform_suspend_prepare_late:准备进入睡眠状态的延迟阶段,执行平台相关的准备操作。
dpm_suspend_noirq:在不需要中断的情况下挂起设备。
platform_suspend_prepare_noirq:准备进入睡眠状态的无中断阶段,执行平台相关的准备操作。
disable_nonboot_cpus:关闭所有非 boot CPU 。
arch_suspend_disable_irqs:关闭全局中断
syscore_suspend:执行系统核心的挂起操作。
pm_wakeup_pending:检查在这段时间内是否有唤醒事件的发生,如果有就要终止suspend
如果前面阶段都一切顺利,则调用suspend_ops->enter(state)回调进行 suspend ,这时系统已经睡过去了,完成系统的suspend。
5、resume流程
系统的 resume 过程刚好和suspend的流程恰好相反,这里就不展开分析了:
syscore_resume()
:恢复系统核心。
arch_suspend_enable_irqs()
:使能中断。
enable_nonboot_cpus()
:使能非启动的 CPU。
platform_resume_noirq(state)
:在没有中断的情况下恢复平台。
dpm_resume_noirq(PMSG_RESUME)
:在没有中断的情况下恢复设备。
platform_resume_early(state)
:早期恢复平台。
dpm_resume_early(PMSG_RESUME)
:早期恢复设备。
platform_resume_finish(state)
:完成平台的恢复操作。
整个系统的 suspend/resume 流程就分析到这里,如果想看suspend_ops->enter函数到底做了什么如果大家感兴趣可以看我的另外一篇文章。