Bootstrap

Linux: 以太网 PHY 驱动简析

1. 前言

限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。

2. 背景

本文基于 linux-4.14.132 内核代码进行分析。

3. 硬件拓扑

在这里插入图片描述
上图来自 瑞昱半导体 (RealTek) 的 RTL8201F 系列网卡 PHY 芯片手册。按OSI 7层网络模型划分,网卡PHY 芯片(图中的RTL8201F)位于物理层,对应的软件层就是本文讨论的 PHY 驱动层;而 MAC 位于 数据链路层,也是通常软件上所说的网卡驱动层,它不是本文的重点,不做展开。另外,可通过 MDIO 接口对 PHY 芯片进行配置(如PHY芯片寄存器读写),而 PHY 和 MAC 通过 MII/RMII 进行数据传输
值得一提的是,Linux的网络子系统,只取了OSI 7层网络模型前4层:物理层、数据链路层、网络层(IP协议等)、传输层(TPC/UDP协议等)在内核代码注释里,我们经常看到用 L1 指代 物理层(如 PHY), L2指代 数据链路层(如 MAC),L3 指代 网络层

4. 以太网卡 PHY 驱动实现

在现在的嵌入式产品中,以太网卡的典型应用是:在 SoC 中集成一个 MAC,而在外围扩展电路中,加上一个以太网 PHY 芯片,这样和 RJ45 以及 网线一起,组成一个完整的以太网通信接口。想集成第二个以太网卡的产品,通常是加入一个 USB 以太网芯片(包含MAC),然后再加入一路以太网 PHY ,和 额外的 RJ45 和 网线一起,组成系统中的第二个以太网通信接口

4.1 MDIO 总线对象的创建和注册

Linux系统中的网卡 PHY ,统一通过 MDIO 总线管理,看一下拓扑结构:

MDIO BUS ----- PHY 0
            |
            |- PHY 1
            |
            |- ...
            |
             - PHY 31

PHY 通过 MDIO 接口挂接在 MDIO BUS 上(通常是 MAC 导出的 MDIO BUS),每个挂接到 MDIO BUS 上的 PHY 设备都有一个唯一地址,PHY 设备地址用 5 bit 来描述,所以一个 MDIO BUS 上最多挂 2 ^ 5 = 32 个 PHY 。需要注意的是,对于不同类型的以太网 PHY 设备,区间 [0,31] 的每一个地址不一定都是可用的,譬如 地址 31DP83640 的广播地址。广播地址对于不同的芯片可能都是不同的,一般是从 031 中选一个作为广播地址。PHY 设备的地址可通过芯片数据手册来确认,该地址应该配置到 PHY 设备相关的 DTS 中。
在 Linux 中, MDIO BUS 用数据结构 struct mii_bus 抽象,看一下它的具体实现:

struct mii_bus {
	struct module *owner;
	const char *name;
	char id[MII_BUS_ID_SIZE];
	void *priv;
	int (*read)(struct mii_bus *bus, int addr, int regnum);
	int (*write)(struct mii_bus *bus, int addr, int regnum, u16 val);
	int (*reset)(struct mii_bus *bus);

	/*
	 * A lock to ensure that only one thing can read/write
	 * the MDIO bus at a time
	 */
	struct mutex mdio_lock;

	struct device *parent;
	enum {
		MDIOBUS_ALLOCATED = 1,
		MDIOBUS_REGISTERED,
		MDIOBUS_UNREGISTERED,
		MDIOBUS_RELEASED,
	} state;
	struct device dev;

	/* list of all PHYs on bus */
	struct mdio_device *mdio_map[PHY_MAX_ADDR];

	/* PHY addresses to be ignored when probing */
	u32 phy_mask;

	/* PHY addresses to ignore the TA/read failure */
	u32 phy_ignore_ta_mask;

	/*
	 * An array of interrupts, each PHY's interrupt at the index
	 * matching its address
	 */
	int irq[PHY_MAX_ADDR];

	/* GPIO reset pulse width in microseconds */
	int reset_delay_us;
	/* RESET GPIO descriptor pointer */
	struct gpio_desc *reset_gpiod;
};

MDIO 总线对象既可由 MAC 层驱动(也就是我们通常所说的网卡驱动)创建创建,也可单独创建注册。我们以一个实际的例子来分析 MDIO 总线 和 PHY 驱动的具体实现,先看 SoC 内置以太网 MAC 设备的 DTS 配置:

emac: ethernet@1c30000 {
	compatible = "allwinner,sun8i-h3-emac";
	...
	local-mac-address = [00 00 00 00 00 00];
	...
	phy-handle = <&ext_rgmii_phy>;
	phy-mode = "rgmii"; /* 使用 1000Mbps 模式 */

	/* MAC 的 MDIO 配置 */
	mdio: mdio {
		#address-cells = <1>;
		#size-cells = <0>;
		...
		/* 挂接在 MAC 的 MDIO 总线上 PHY 的 配置 */
		int_mii_phy: ethernet-phy@1 {
			compatible = "ethernet-phy-ieee802.3-c22";
			...
		};
	};
};

来看 MDIO 总线对象的注册细节,从 MAC 驱动(也即通常说的网卡驱动)开始:

static const struct of_device_id sun8i_dwmac_match[] = {
	{ .compatible = "allwinner,sun8i-h3-emac",
		.data = &emac_variant_h3 },
	...
	{ }
};
MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); 

static struct platform_driver sun8i_dwmac_driver = {
	.probe  = sun8i_dwmac_probe,
	...
	.driver = {
		.name           = "dwmac-sun8i",
		...
		.of_match_table = sun8i_dwmac_match,
	},
};
module_platform_driver(sun8i_dwmac_driver);

/* 
 * emac 驱动加载过程 
 */
static int sun8i_dwmac_probe(struct platform_device *pdev)
{
	int ret;
	
	...
	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
	...
	return ret;
}

int stmmac_dvr_probe(struct device *device,
		     struct plat_stmmacenet_data *plat_dat,
		     struct stmmac_resources *res)
{
	struct net_device *ndev = NULL;

	/* 创建网卡设备对象 */
	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
				  MTL_MAX_TX_QUEUES,
				  MTL_MAX_RX_QUEUES);
	
	...
	
	stmmac_set_ethtool_ops(ndev); /* 提供对用户空间工具 ethtool 的支持 */
	...
	
	/* 设置 MAC 地址 */
	if (res->mac)
		memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN);

	...

	ndev->netdev_ops = &stmmac_netdev_ops; /* 配置网卡接口 */

	...

	if (priv->hw->pcs != STMMAC_PCS_RGMII  &&
	    priv->hw->pcs != STMMAC_PCS_TBI &&
	    priv->hw->pcs != STMMAC_PCS_RTBI) {
		/* MDIO bus Registration */
		ret = stmmac_mdio_register(ndev); /* 注册 MAC 的 MDIO 总线对象 */
		...
	}

	ret = register_netdev(ndev); /* 注册网卡设备对象 */

	return ret;
}

我们这里实际已经给出了一个网卡驱动的框架,但我们这里不关注网卡驱动,MDIO 总线对象的注册过程,是我们关注的重点:

int stmmac_mdio_register(struct net_device *ndev)
{
	int err = 0;
	struct mii_bus *new_bus;
	...
	struct device_node *mdio_node = priv->plat->mdio_node;

	...
	new_bus = mdiobus_alloc(); /* 创建 MDIO 总线对象 */
	...

	if (mdio_node) {
		...
		err = of_mdiobus_register(new_bus, mdio_node); /* 注册 MDIO 总线对象 */
	}  else {
		...
	}

	...
	
	return 0;
}

/* 创建 MDIO 总线对象 */
static inline struct mii_bus *mdiobus_alloc(void)
{
	return mdiobus_alloc_size(0);
}

struct mii_bus *mdiobus_alloc_size(size_t size)
{
	struct mii_bus *bus;
	size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN);
	size_t alloc_size;
	int i;

	alloc_size = sizeof(*bus);

	bus = kzalloc(alloc_size, GFP_KERNEL);
	...

	bus->state = MDIOBUS_ALLOCATED;
	...

	/* Initialise the interrupts to polling */
	/* 初始设置所有 PHY 以轮询方式处理连接事件(Link Up/Down, 自动协商, ...) */
	for (i = 0; i < PHY_MAX_ADDR; i++)
		bus->irq[i] = PHY_POLL;

	return bus;
}

/* drivers/of/of_mdio.c */
/* 注册 MDIO 总线对象 */
int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
{
	int addr, rc;
	
	rc = mdiobus_register(mdio); /* __mdiobus_register(mdio, THIS_MODULE); */

	/* 
	 * 解析并注册 MDIO 总线后,这里还包含 MDIO 总线下挂载的 PHY 
	 * 设备的解析和注册过程,将在后面的章节进行分析。
	 * emac: ethernet@1c30000 {
	 * 		compatible = "allwinner,sun8i-h3-emac";
	 * 		...
	 * 		mdio: mdio {
	 * 			...
	 * 			// 挂接在 MAC 的 MDIO 总线上 PHY 的 配置
	 *			int_mii_phy: ethernet-phy@1 {
	 *				compatible = "ethernet-phy-ieee802.3-c22";
	 *				...
	 *			};
	 * 		};
	 * };
	 */
	/* Loop over the child nodes and register a phy_device for each phy */
	for_each_available_child_of_node(np, child) {
		...
		if (of_mdiobus_child_is_phy(child))
			rc = of_mdiobus_register_phy(mdio, child, addr); // 细节后续展开
		else
			...
		...
	}

	/* auto scan for PHYs with empty reg property */
	for_each_available_child_of_node(np, child) {
		/* Skip PHYs with reg property set */
		if (of_find_property(child, "reg", NULL))
			continue;
		
		for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
			/* skip already registered PHYs */
			if (mdiobus_is_registered_device(mdio, addr))
				continue;
			
			/* be noisy to encourage people to set reg property */
			dev_info(&mdio->dev, "scan phy %s at address %i\n",
				 child->name, addr);
			
			if (of_mdiobus_child_is_phy(child)) {
				// 细节后续展开
				rc = of_mdiobus_register_phy(mdio, child, addr);
				...
			}
		}
		...
	}
	...

	return 0;
}

int __mdiobus_register(struct mii_bus *bus, struct module *owner)
{
	struct mdio_device *mdiodev;

	bus->owner = owner;
	bus->dev.parent = bus->parent;
	bus->dev.class = &mdio_bus_class;
	bus->dev.groups = NULL;
	dev_set_name(&bus->dev, "%s", bus->id);

	err = device_register(&bus->dev); /* 注册 MIDO 总线对象设备到 driver core */
	...

	/*
	 * 探测地址 0 ~ 31 ,看对应地址是否有物理设备挂接到 MDIO 总线上:
	 * 如果有,创建 MDIO 从设对象(struct phy_device) ,并将其注册到 
	 * MDIO 总线 和 driver core 。
	 */
	for (i = 0; i < PHY_MAX_ADDR; i++) {
		if ((bus->phy_mask & (1 << i)) == 0) {
			struct phy_device *phydev;

			/* 细节在后续章节展开 */
			phydev = mdiobus_scan(bus, i);
			...
		}
	}

	...

	bus->state = MDIOBUS_REGISTERED;
	pr_info("%s: probed\n", bus->name);
	return 0;
}

到此,已经完成 MAC 设备 MDIO 总线对象的 创建 和 注册,期间会伴随着挂接在 MDIO 总线上从设的扫描注册,其细节将在接下来的章节展开。MDIO 总线对象的 创建 和 注册,涉及到两个接口:

/* 
 * MDIO 总线对象 创建、释放 接口。
 */
// include/linux/phy.h  
struct mii_bus *mdiobus_alloc_size(size_t);
static inline struct mii_bus *mdiobus_alloc(void)
{
	return mdiobus_alloc_size(0);
}

void mdiobus_free(struct mii_bus *bus);

struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
{
	return devm_mdiobus_alloc_size(dev, 0);
}

void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);

/* 
 * MDIO 总线对象 注册、注销 接口。
 */
// include/linux/phy.h 
int __mdiobus_register(struct mii_bus *bus, struct module *owner);
#define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE)

void mdiobus_unregister(struct mii_bus *bus);

/* MDIO 总线对象 注册 接口:一些带 DTS 数据解析变种 */
// include/linux/of_mdio.h
extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);

4.2 MDIO 总线从设的 创建注册 和 驱动注册的加载

MDIO 总线从设,具体到我们例子中,就是以太网的 PHY 设备。

4.2.1 以太网的 PHY 设备创建和注册

上面提到,MDIO 总线对象的创建和注册过程中,会伴随对总线上从设的扫描探测,并为探测到物理设备创建设备对象并注册的过程。我们在这里展开章节 4.1 未涉及到的前述细节:

/* drivers/of/of_mdio.c */
/* 注册 MDIO 总线对象 */
int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
{
	int addr, rc;
	
	rc = mdiobus_register(mdio); /* __mdiobus_register(mdio, THIS_MODULE); */

	/* 
	 * 解析并注册 MDIO 总线后,这里还包含 MDIO 总线下挂载的 PHY 
	 * 设备的解析和注册过程,将在后面的章节进行分析。
	 * emac: ethernet@1c30000 {
	 * 		compatible = "allwinner,sun8i-h3-emac";
	 * 		...
	 * 		mdio: mdio {
	 * 			...
	 * 			// 挂接在 MAC 的 MDIO 总线上 PHY 的 配置
	 *			int_mii_phy: ethernet-phy@1 {
	 *				compatible = "ethernet-phy-ieee802.3-c22";
	 *				...
	 *			};
	 * 		};
	 * };
	 */
	/* Loop over the child nodes and register a phy_device for each phy */
	for_each_available_child_of_node(np, child) {
		addr = of_mdio_parse_addr(&mdio->dev, child);
		...
		if (of_mdiobus_child_is_phy(child))
			rc = of_mdiobus_register_phy(mdio, child, addr);
		else
			...
		...
	}

	...
	
	return 0;
}

static bool of_mdiobus_child_is_phy(struct device_node *child)
{
	...
	
	/* 和我们例子中的 DTS 示例匹配 */
	if (of_device_is_compatible(child, "ethernet-phy-ieee802.3-c22"))
		return true;

	...

	return false;
}

static int of_mdiobus_register_phy(struct mii_bus *mdio,
				    struct device_node *child, u32 addr)
{
	struct phy_device *phy;

	if (!is_c45 && !of_get_phy_id(child, &phy_id))
		...
	else
		/* 为探测到 PHY 设备,创建 struct phy_device 设备对象 */
		phy = get_phy_device(mdio, addr, is_c45);
	
	...
	/* 简化了下此处代码 */
	phy->irq = mdio->irq[addr]; /* phy->irq = PHY_POLL; */

	...
	of_node_get(child);
	phy->mdio.dev.of_node = child;

	/* 注册 PHY 设备对象 到 对应的 MDIO 总线,同时添加 PHY 设备对象 到 driver core 。
	 * 如果 PHY 驱动已经注册,可触发驱动加载过程。
	 */ 
	rc = phy_device_register(phy);
	...

	return 0;
}

/* 探测 PHY 设备,并为探测到的设备创建 struct phy_device 设备对象 */
struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45)
{
	struct phy_c45_device_ids c45_ids = {0};
	u32 phy_id = 0;
	int r;
	
	r = get_phy_id(bus, addr, &phy_id, is_c45, &c45_ids);
	if (r) /* 没探测到 PHY 设备 */
		return ERR_PTR(r);

	/* If the phy_id is mostly Fs, there is no device there */
	if ((phy_id & 0x1fffffff) == 0x1fffffff) /* 探测设备的 PHY ID 错误 */
		return ERR_PTR(-ENODEV);
	
	/* 探测到了 PHY 设备,为其创建设备对象 struct phy_device */
	return phy_device_create(bus, addr, phy_id, is_c45, &c45_ids);
}

/* 探测 PHY 设备,实际就是读几个 PHY 寄存器,这些寄存器是 PHY 芯片规范设定的 (802.3-c22, ...) */
static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id,
		      bool is_c45, struct phy_c45_device_ids *c45_ids)
{
	int phy_reg;

	if (is_c45)
		return get_phy_c45_ids(bus, addr, phy_id, c45_ids);

	/* Grab the bits from PHYIR1, and put them in the upper half */
	phy_reg = mdiobus_read(bus, addr, MII_PHYSID1);
	if (phy_reg < 0)
		return -EIO;

	*phy_id = (phy_reg & 0xffff) << 16;

	/* Grab the bits from PHYIR2, and put them in the lower half */
	phy_reg = mdiobus_read(bus, addr, MII_PHYSID2);
	if (phy_reg < 0)
		return -EIO;

	*phy_id |= (phy_reg & 0xffff);

	return 0;
}

/* 创建 PHY 设备对象 */
struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
				     bool is_c45,
				     struct phy_c45_device_ids *c45_ids)
{
	struct phy_device *dev;
	struct mdio_device *mdiodev;

	dev = kzalloc(sizeof(*dev), GFP_KERNEL);

	mdiodev = &dev->mdio;
	mdiodev->dev.release = phy_device_release;
	mdiodev->dev.parent = &bus->dev;
	mdiodev->dev.bus = &mdio_bus_type;
	mdiodev->bus = bus;
	/*
	 * struct phy_device {
	 * 		struct mdio_device mdio; // 可以认为 phy_device 类设备是 mdio_device 的子类设备
	 * 		...
	 * };
	 */
	mdiodev->pm_ops = MDIO_BUS_PHY_PM_OPS; /* 理论上,MDIO 总线从设备,不一定只有 PHY ,区分下种类 */
	mdiodev->bus_match = phy_bus_match;
	mdiodev->addr = addr;
	mdiodev->flags = MDIO_DEVICE_FLAG_PHY;
	mdiodev->device_free = phy_mdio_device_free;
	mdiodev->device_remove = phy_mdio_device_remove;

	...
	dev->link = 1;
	dev->interface = PHY_INTERFACE_MODE_GMII;

	dev->autoneg = AUTONEG_ENABLE;

	...
	dev->phy_id = phy_id; /* 从物理 PHY 设备寄存器读到的 PHY ID */
	...
	dev->irq = bus->irq[addr]; /* PHY_POLL */
	dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); /* 设置 PHY 设备对象名 */

	/* 处理 PHY 状态变化的 work : 如 link down, up, ... */
	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
	INIT_WORK(&dev->phy_queue, phy_change_work);

	device_initialize(&mdiodev->dev);

	return dev;
}

到此,以太网 PHY 设备的 创建 和 注册 已经完成。
必须说明的是,例子所对应的实际案例中,并没有挂接我们例子中 DTS 配置的 "ethernet-phy-ieee802.3-c22" PHY 设备,实际上挂接在 MAC 设备上的以太网 PHY 芯片,是 RealTek 的 RTL8201F。该 PHY 设备在 MAC 驱动注册 MDIO 总线对象时,在 get_phy_device() 流程中被探测到,并为其创建了对应的设备对象。

4.2.2 以太网的 PHY 设备驱动注册和加载

说完 PHY 设备的创建注册,接下来看 PHY 设备驱动的注册和加载过程。

/* include/linux/phy.h */

#define phy_module_driver(__phy_drivers, __count)			\
static int __init phy_module_init(void)					\
{									\
	return phy_drivers_register(__phy_drivers, __count, THIS_MODULE); \
}									\
module_init(phy_module_init);						\
static void __exit phy_module_exit(void)				\
{									\
	phy_drivers_unregister(__phy_drivers, __count);			\
}									\
module_exit(phy_module_exit)

#define module_phy_driver(__phy_drivers)				\
	phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers))
static int rtl8211f_config_init(struct phy_device *phydev)
{
	// 按芯片手册做适当操作
	return 0;
}

static int rtl8211f_ack_interrupt(struct phy_device *phydev)
{
	// 按芯片手册做适当操作
	return 0;
}

static int rtl8211f_config_intr(struct phy_device *phydev)
{
	// 按芯片手册做适当操作
	return 0;
}

static struct phy_driver realtek_drvs[] = {
	...
	{
		.phy_id		= 0x001cc916, /* PHY ID */
		.name		= "RTL8211F Gigabit Ethernet",
		.phy_id_mask	= 0x001fffff, /* PHY ID 掩码 */
		.features	= PHY_GBIT_FEATURES,
		.flags		= PHY_HAS_INTERRUPT,
		/* PHY 驱动接口 */
		.config_aneg	= &genphy_config_aneg,
		.config_init	= &rtl8211f_config_init,
		.read_status	= &genphy_read_status,
		.ack_interrupt	= &rtl8211f_ack_interrupt,
		.config_intr	= &rtl8211f_config_intr,
		.suspend	= genphy_suspend,
		.resume		= genphy_resume,
	},	
	...
};

module_phy_driver(realtek_drvs); /* 模块 加载、卸载 时分别 注册、注销 PHY 驱动 */

static struct mdio_device_id __maybe_unused realtek_tbl[] = {
	{ 0x001cc916, 0x001fffff }, /* PHY ID 和 PHY ID 掩码 */
	{ }
};
MODULE_DEVICE_TABLE(mdio, realtek_tbl);
int phy_drivers_register(struct phy_driver *new_driver, int n,
			 struct module *owner)
{
	int i, ret = 0;

	for (i = 0; i < n; i++) {
		ret = phy_driver_register(new_driver + i, owner);
		if (ret) {
			while (i-- > 0)
				phy_driver_unregister(new_driver + i);
			break;
		}
	}
	return ret;
}

int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
{
	int retval;

	new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY;
	new_driver->mdiodrv.driver.name = new_driver->name;
	new_driver->mdiodrv.driver.bus = &mdio_bus_type;
	new_driver->mdiodrv.driver.probe = phy_probe;
	new_driver->mdiodrv.driver.remove = phy_remove;
	new_driver->mdiodrv.driver.owner = owner;

	/* 
	 * 注册 PHY 驱动到 driver core 。
	 * 如果 PHY 设备已经注册,可触发此驱动加载过程。
	 */
	retval = driver_register(&new_driver->mdiodrv.driver);
	...

	return retval;
}

假设 PHY 设备已经先注册到 driver core (先后关系是无所谓的,不管是哪种顺序,最终都会触发驱动的加载),注册 PHY 驱动将触发驱动加载过程:

phy_driver_register()
	driver_register(&new_driver->mdiodrv.driver)
		bus_add_driver(drv)
			driver_attach(drv)
				bus_for_each_dev(drv->bus, NULL, drv, __driver_attach)
					while ((dev = next_device(&i)) && !error)
						/* 循环到注册的 PHY 设备时 */
						fn(dev, data) = __driver_attach()
							/* 匹配设备和驱动 */
							driver_match_device(drv, dev)
								mdio_bus_match(dev, drv)
									phy_bus_match(dev, drv)
										 /* 按 phy_id & phy_id_mask 匹配 */
										return (phydrv->phy_id & phydrv->phy_id_mask) == (phydev->phy_id & phydrv->phy_id_mask);
							/* 匹配到设备和驱动,加载驱动 */
							driver_probe_device(drv, dev)
								really_probe(dev, drv)
									dev->driver = drv; /* 绑定设备的驱动 */
									drv->probe(dev) = phy_probe()
/*
 * 所有 PHY 驱动的公共入口,做 PHY 公共初始化,然后再由它再调用
 * 具体 PHY 驱动的 probe (如果实现了的话)。 
 */
static int phy_probe(struct device *dev)
{
	struct phy_device *phydev = to_phy_device(dev);
	struct device_driver *drv = phydev->mdio.dev.driver;
	struct phy_driver *phydrv = to_phy_driver(drv);
	int err = 0;

	phydev->drv = phydrv; /* 绑定 phy_device 和 phy_driver */

	/* PHY 中断模式最终配置 */ 
	if (!(phydrv->flags & PHY_HAS_INTERRUPT) &&
	    phy_interrupt_is_valid(phydev))
		phydev->irq = PHY_POLL;  /* 除非后面 PHY 驱动的 probe 调用改变 irq, 否则都会是 PHY_POLL 模式 */

	...

	/* PHY 功能特性配置 */ 
	phydev->supported = phydrv->features;
	of_set_phy_supported(phydev);
	phydev->advertising = phydev->supported;

	...

	/* Set the state to READY by default */
	phydev->state = PHY_READY; /* 标记 PHY 设备已经就绪 */

	if (phydev->drv->probe)
		err = phydev->drv->probe(phydev); /* PHY 驱动的 probe */

	return err;
}

到此,以太网 PHY 设备的驱动也已经加载,看起来似乎一切都已经结束了,是这样吗?事实上,我们还差一步,就是在软件层面绑定 MAC 和 PHY,让它们一起协作 ,这样才组成了一张完整的以太网卡。

4.3 绑定以太网卡的 MAC 和 PHY

在用户打开网卡时,将触发网卡设备对象的 net_device::netdev_ops->open 接口,即我们例子中的 stmmac_open()

/*
 * 启动网卡:
 * ip link set dev eth0 up
 * ifconfig eth0 up
 */
sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
ioctl(sockfd, SIOCSIFFLAGS, {ifr_name="eth0", ifr_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST})
	sock_ioctl()
		sock_do_ioctl()
			dev_ioctl()
				dev_ifsioc()
					dev_change_flags()
						__dev_change_flags()
							__dev_open()
								/* 调用网卡驱动 open (启动)接口 */
								ops->ndo_open(dev) = stmmac_open(dev)
static int stmmac_open(struct net_device *dev)
{
	if (priv->hw->pcs != STMMAC_PCS_RGMII &&
	    priv->hw->pcs != STMMAC_PCS_TBI &&
	    priv->hw->pcs != STMMAC_PCS_RTBI) {
	    /* 绑定 MAC 和 PHY */
	    ret = stmmac_init_phy(dev);
	    ...
	}
	
	...

	/* 启动 MAC 连接的 PHY */
	if (dev->phydev)
		phy_start(dev->phydev);

	/* 网卡数据处理中断 */
	ret = request_irq(dev->irq, stmmac_interrupt,
			  			IRQF_SHARED, dev->name, dev);
	
	...

	/* 启动 MAC 收发 */
	stmmac_enable_all_queues(priv);
	stmmac_start_all_queues(priv);

	return 0;
}

static int stmmac_init_phy(struct net_device *dev)
{
	struct phy_device *phydev;

	phydev = of_phy_connect(dev, priv->plat->phy_node,
					&stmmac_adjust_link, 0, interface);
	
	...

	/* 
	 * 内核日志输出如下格式信息: 
	 * "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%s)"
	 * 如:
	 * ADIN1200 stmmac-0:00: attached PHY driver [ADIN1200] (mii_bus:phy_addr=stmmac-0:00, irq=POLL)
	 */
	phy_attached_info(phydev);
	return 0;
}

struct phy_device *of_phy_connect(struct net_device *dev,
				  struct device_node *phy_np,
				  void (*hndlr)(struct net_device *), u32 flags,
				  phy_interface_t iface
{
	struct phy_device *phy = of_phy_find_device(phy_np);
	int ret;

	phy->dev_flags = flags;
	
	ret = phy_connect_direct(dev, phy, hndlr, iface);

	put_device(&phy->mdio.dev);
	
	return ret ? NULL : phy;
}

int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
		       void (*handler)(struct net_device *),
		       phy_interface_t interface)
{
	int rc;

	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
	if (rc)
		return rc;

	/* 设定状态改变 MAC 层驱动回调改变 Link 状态, 显示 Link is Down/Up 信息等 */
	phy_prepare_link(phydev, handler); 
	/* 启动 PHY 状态机 work , 此时 PHY 才真正工作起来: phy_state_machine() */
	phy_start_machine(phydev);
	if (phydev->irq > 0) /* 没见过从中断方式工作的 PHY,都是 work 方式 */
		phy_start_interrupts(phydev);

	return 0;
}

int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
		      u32 flags, phy_interface_t interface)
{
	int err;
	
	...

	phydev->phy_link_change = phy_link_change;
	/* 关联 net_device (MAC 层) 和 phy_device (物理层) */
	phydev->attached_dev = dev; /* 设定 PHY 关联的 MAC */
	dev->phydev = phydev; /* 设定 MAC 关联的 PHY */
	
	...
	
	phydev->dev_flags = flags;

	phydev->interface = interface; /* 设定 PHY 和 MAC 数据通信接口类型: MII, RMII, ... */

	phydev->state = PHY_READY;

	netif_carrier_off(phydev->attached_dev);

	err = phy_init_hw(phydev);

	phy_resume(phydev); /* 重启 PHY 设备 */
	phy_led_triggers_register(phydev);

	return err;
}

static void phy_prepare_link(struct phy_device *phydev,
			     void (*handler)(struct net_device *))
{
	phydev->adjust_link = handler;
}

/* 将 PHY 状态机 work 加入到 workqueue */
void phy_start_machine(struct phy_device *phydev)
{
	/* phydev->state_queue == phy_state_machine() */
	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
}

/* 重启 PHY 状态机 work */
void phy_start(struct phy_device *phydev)
{
	switch (phydev->state) {
	...
	case PHY_READY:
		phydev->state = PHY_UP; /* 状态更新: PHY_READY -> PHY_UP */
		break;
	...
	}

	phy_trigger_machine(phydev, true);
}

void phy_trigger_machine(struct phy_device *phydev, bool sync)
{
	if (sync)
		cancel_delayed_work_sync(&phydev->state_queue);
	else
		cancel_delayed_work(&phydev->state_queue);
	/* 无延时的重启 PHY 状态机 work */
	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
}

从此, PHY 和 MAC 一起愉快地玩耍了。PHY 工作在其状态机 work 函数 phy_state_machine() 中。

4.4 以太网卡 PHY 和 MAC 的协作

以太网卡 PHY 管理了 连接状态、和对端通信速度的自动协商 等工作,作为一个网卡内外部沟通的桥梁:对外连接了网线,对内连接着 MAC。我们来看 PHY 是如何工作的。现在我们知道 PHY 工作在状态机 work 函数 phy_state_machine() 中:

void phy_state_machine(struct work_struct *work)
{
	struct delayed_work *dwork = to_delayed_work(work);
	struct phy_device *phydev =
			container_of(dwork, struct phy_device, state_queue);
	bool needs_aneg = false, do_suspend = false;
	enum phy_state old_state;
	int err = 0;
	int old_link;

	mutex_lock(&phydev->lock);

	old_state = phydev->state;

	/* PHY 驱动检测连接状态变化 */
	if (phydev->drv && phydev->drv->link_change_notify)
		phydev->drv->link_change_notify(phydev);

	/* PHY 工作状态机 */
	switch (phydev->state) {
	case PHY_DOWN:
	case PHY_STARTING:
	case PHY_READY:
	case PHY_PENDING:
		break;
	case PHY_UP:
		needs_aneg = true;

		phydev->link_timeout = PHY_AN_TIMEOUT;

		break;
	case PHY_AN:
		err = phy_read_status(phydev);
		if (err < 0)
			break;

		/* If the link is down, give up on negotiation for now */
		if (!phydev->link) {
			phydev->state = PHY_NOLINK;
			phy_link_down(phydev, true);
			break;
		}

		/* Check if negotiation is done.  Break if there's an error */
		err = phy_aneg_done(phydev);
		if (err < 0)
			break;

		/* If AN is done, we're running */
		if (err > 0) {
			phydev->state = PHY_RUNNING;
			phy_link_up(phydev);
		} else if (0 == phydev->link_timeout--)
			needs_aneg = true;
		break;
	case PHY_NOLINK:
		if (phy_interrupt_is_valid(phydev))
			break;

		err = phy_read_status(phydev);
		if (err)
			break;

		if (phydev->link) {
			if (AUTONEG_ENABLE == phydev->autoneg) {
				err = phy_aneg_done(phydev);
				if (err < 0)
					break;

				if (!err) {
					phydev->state = PHY_AN;
					phydev->link_timeout = PHY_AN_TIMEOUT;
					break;
				}
			}
			phydev->state = PHY_RUNNING;
			phy_link_up(phydev);
		}
		break;
	case PHY_FORCING:
		err = genphy_update_link(phydev);
		if (err)
			break;

		if (phydev->link) {
			phydev->state = PHY_RUNNING;
			phy_link_up(phydev);
		} else {
			if (0 == phydev->link_timeout--)
				needs_aneg = true;
			phy_link_down(phydev, false);
		}
		break;
	case PHY_RUNNING:
		/* Only register a CHANGE if we are polling and link changed
		 * since latest checking.
		 */
		if (phydev->irq == PHY_POLL) {
			old_link = phydev->link;
			err = phy_read_status(phydev);
			if (err)
				break;

			if (old_link != phydev->link)
				phydev->state = PHY_CHANGELINK;
		}
		/*
		 * Failsafe: check that nobody set phydev->link=0 between two
		 * poll cycles, otherwise we won't leave RUNNING state as long
		 * as link remains down.
		 */
		if (!phydev->link && phydev->state == PHY_RUNNING) {
			phydev->state = PHY_CHANGELINK;
			phydev_err(phydev, "no link in PHY_RUNNING\n");
		}
		break;
	case PHY_CHANGELINK:
		err = phy_read_status(phydev);
		if (err)
			break;

		if (phydev->link) {
			phydev->state = PHY_RUNNING;
			phy_link_up(phydev);
		} else {
			phydev->state = PHY_NOLINK;
			phy_link_down(phydev, true);
		}

		if (phy_interrupt_is_valid(phydev))
			err = phy_config_interrupt(phydev,
						   PHY_INTERRUPT_ENABLED);
		break;
	case PHY_HALTED:
		if (phydev->link) {
			phydev->link = 0;
			phy_link_down(phydev, true);
			do_suspend = true;
		}
		break;
	case PHY_RESUMING:
		if (AUTONEG_ENABLE == phydev->autoneg) {
			err = phy_aneg_done(phydev);
			if (err < 0)
				break;

			/* err > 0 if AN is done.
			 * Otherwise, it's 0, and we're  still waiting for AN
			 */
			if (err > 0) {
				err = phy_read_status(phydev);
				if (err)
					break;

				if (phydev->link) {
					phydev->state = PHY_RUNNING;
					phy_link_up(phydev);
				} else	{
					phydev->state = PHY_NOLINK;
					phy_link_down(phydev, false);
				}
			} else {
				phydev->state = PHY_AN;
				phydev->link_timeout = PHY_AN_TIMEOUT;
			}
		} else {
			err = phy_read_status(phydev);
			if (err)
				break;

			if (phydev->link) {
				phydev->state = PHY_RUNNING;
				phy_link_up(phydev);
			} else	{
				phydev->state = PHY_NOLINK;
				phy_link_down(phydev, false);
			}
		}
		break;
	}

	mutex_unlock(&phydev->lock);

	if (needs_aneg) /* 需要自动协商 */
		err = phy_start_aneg_priv(phydev, false); /* 自动协商处理 */
	else if (do_suspend)
		phy_suspend(phydev);

	if (err < 0)
		phy_error(phydev);

	if (old_state != phydev->state)
		phydev_dbg(phydev, "PHY state change %s -> %s\n",
			   phy_state_to_str(old_state),
			   phy_state_to_str(phydev->state));

	/* Only re-schedule a PHY state machine change if we are polling the
	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
	 * between states from phy_mac_interrupt()
	 */
	if (phydev->irq == PHY_POLL)
		queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
				   PHY_STATE_TIME * HZ); /* 重启 PHY 状态机 work */
}

PHY 要处理的事务较多,这里不一一列举,仅就连接状态变化来做一下说明:

static void phy_link_up(struct phy_device *phydev)
{
	phydev->phy_link_change(phydev, true, true); /* phy_link_change() */
	phy_led_trigger_change_speed(phydev);
}

static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
{
	struct net_device *netdev = phydev->attached_dev;

	if (do_carrier) {
		if (up)
			netif_carrier_on(netdev);
		else
			netif_carrier_off(netdev);
	}
	/* 通过 MAC 驱动注册的连接状态监控接口,告知其连接的变化 */
	phydev->adjust_link(netdev); /* stmmac_adjust_link() */
}

/* MAC 驱动连接状态变化时的处理接口 */
static void stmmac_adjust_link(struct net_device *dev)
{
	/* MAC 驱动针对连接状态变化的处理 ... */

	/* 
	 * 我们在内核日志最熟悉的连接状态变化日志来了。
	 * 连接拉起时:Link is Up 1000 Mbps Full Duplex, Flow Control: None
	 * 连接掉线时:Link is Down
	 */
	if (new_state && netif_msg_link(priv))
		phy_print_status(phydev);
		
	...
}

/* drivers/net/phy/phy.c */
void phy_print_status(struct phy_device *phydev)
{
	if (phydev->link) {
		netdev_info(phydev->attached_dev,
			"Link is Up - %s/%s - flow control %s\n",
			phy_speed_to_str(phydev->speed),
			phy_duplex_to_str(phydev->duplex),
			phydev->pause ? "rx/tx" : "off");
	} else	{
		netdev_info(phydev->attached_dev, "Link is Down\n");
	}
}

4.5 以太网卡 PHY 驱动示范

写一个 以太网卡 PHY 驱动很简单,驱动框架如下:

/* 一些可能需要实现的 phy_driver 接口 */

static struct phy_driver xxx_phy_drvs[] = {
	/* 根据 PHY 芯片情形实现需要的接口 */
	{
		.phy_id         = 0x00008201,
		.name           = "RTL8201CP Ethernet",
		.phy_id_mask    = 0x0000ffff,
		.features       = PHY_BASIC_FEATURES,
		.flags          = PHY_HAS_INTERRUPT,
		.config_aneg    = &genphy_config_aneg,
		.read_status    = &genphy_read_status,
		......
	}, 
};

module_phy_driver(xxx_phy_drvs);

static struct mdio_device_id __maybe_unused xxx_phy_id_tbl[] = {
	{ 0x001cc916, 0x001fffff }, 
	{ }
};

MODULE_DEVICE_TABLE(mdio, xxx_phy_id_tbl);

5. FAQ

Q. 系统当中明明只挂接了一个网络 PHY 芯片,为什么会被两次扫描到?如出现类似下面的日志:
[    1.230635] davinci_mdio 4a101000.mdio: phy[0]: device 4a101000.mdio:00, driver YT8531 Gigabit Ethernet
[    1.240171] davinci_mdio 4a101000.mdio: phy[1]: device 4a101000.mdio:01, driver YT8531 Gigabit Ethernet
A. 有可能设备的广播地址是 0 。
   像问题中的 YT8531 芯片, 它的广播地址就是 0,而它实际挂接的地址是 1,MDIO 从 0 往高地址扫描,
   先扫描 0 地址,后扫描 1 地址,所以 PHY 会被两次扫描到。

Q. 对比同一款 PHY,人家做出来的效果,插拔网线连接速度飞快,而我的有时候重新连接居然耗时 30s ?
A. 有些 PHY 支持一些节能模式(如 RTL8211F 的 EEE),会关闭部分电路,重新启动会比较耗时,关闭这些特性即可。
   当然,有时候上电复位不良,也可能导致类似问题。

6. 参考资料

[1] AN 796: Cyclone® V和 Arria® V SoC 器件设计指南,4.5.1.1.1. RGMII

;