Linux管道的实现（c语言）

Linux管道(|)的实现（c语言）

重定向（redirection）

在操作系统中，shell提供了非常方便的重定向命令：

当shell运行一个程序时，它会打开三个文件描述符：0,1,2，默认情况下，它们被连接到终端。例如0是标准输入（键盘），1是标准输出（控制台），2是标准错误输出（控制台）。

管道（pipe）

与其直接将标准输出与设备（如屏幕）相连接，Linux将文件描述符指向内核提供的一片缓冲区。输入端与RHS program相连接，生产数据；输出端与LHS program相连接，消费LHS program的输出。

管道是一个连接两个进程输入输出的缓冲区，这个缓冲区在内存中。文件描述符指向这个管道，管道负责对输入输出（读写操作）进行缓冲。

如图所示，管道将ls进程的标准输出（文件描述符1）与more进程的标准输入（文件描述符0）相连接，充当缓冲区的作用。意思是，ls产生的标准输出将被重定向到管道pipe的缓冲区，more会不断消费这个缓冲区的内容。

文件描述符

文件描述符通常对每个进程都是唯一的，但是它们可以被通过fork产生的子进程所共享，或者被fcntl, dup, dup2函数拷贝。

dup函数

int dup(int oldfd);

函数创建一个文件描述符的拷贝，并且将该文件的引用计数加1，返回最小的可用的文件描述符数字。

dup2函数

int dup2(int oldfd, int newfd);

该函数与dup()函数做同样的工作，但是并不是使用最小的可用文件描述符，而是使用newfd所代表的文件描述符数字。换言之，文件描述符 newfd 进行了调整，指向了oldfd所指向的文件。

如果newfd已经被使用，则先关闭该文件，然后再被重用。相比于先关闭文件描述符，再使用dup()复制，这个重用操作是原子性的。

例如，文件描述符`fd`已经打开了某个文件，则该代码可将标准输入重定向到这个文件。

#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
int main(void)
{
    int fd = open(“my_new_file.txt”, O_APPEND | O_WRONLY); // opens a new file
    if (fd < 0)
        printf(“Error opening the file\n”);
    dup2(fd, STDOUT_FILENO); // now the STDOUT_FILENO points to “my_new_file.txt”
    close(fd);              // now we can delete the fd from the file descrptor table
    printf(“This message will be printed to the my_new_file.txt, instead of the console.\n”) return 0;
}

多进程

Linux提供了很多方式来创建一个新的进程，例如fork，execve。

fork函数

pid_t fork(void);

fork()函数通过复制调用进程（父进程）来创建一个新的进程（子进程）。子进程和父进程在独立的内存空间内运行。在fork()函数执行的瞬间，子父进程的内存空间内容是相同的。

返回值：子进程中返回0；父进程中返回PID；父进程返回-1代表失败，不会创建子进程，并设置errno。

例如，通过返回值判断子父进程，并执行不同的操作：

#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
int main(void)
{
    int return_code = fork();
    if (return_code == 0)
    {
        printf(“This is the child process !\n”);
    }
    else if (return_code > 0)
    {
        printf(“This is the parent process ! \n”);
    }
    else
    {
        printf(“Child process creation error ! \n”);
    }
    return 0;
}

execve函数

int execve(const char *pathname, char *const argv[], char *const envp[]);

execve()执行pathname路径名所代表的程序，这将会导致调用线程中运行的程序被该程序所替换，并且重新初始化内存空间（堆栈、数据段）。

argv[]为参数列表，以NULL结尾。

envp[]是环境变量列表，格式为：name=value。

当运行成功后，他不会返回任何东西，因为原进程的程序已经被替换。具体地，该函数只会在发生错误时返回-1，并设置errno。

例子：

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
int main(int argc, char **argv)
{
    char *args[] = {"ls", "-aF", "/", NULL}; /* each element represents a command line
    argument */
    char *env[] = {NULL};                    /* leave the environment list null */
    printf("About to run /bin/ls\n");
    execve("/bin/ls", args, env);
    perror("execve"); /* if we get here, execve failed */
    exit(1);
}

尽管execve()函数非常强大，但是有其他封装在该函数之上的其他函数，例如execlp()，它们更加简单易用。

execlp函数

int execlp(const char *file, const char *arg, ...);

该函数与execve()大同小异，但是它无需传递环境变量，默认使用PATH环境变量进行file的搜索。其参数以参数列表的形式逐个传递，并以(char*)NULL结尾。

执行ls函数的代码如下。注意，在c语言中，参数列表第一个永远是程序名称：

if (execlp(“ls”, “ls”, (char*)NULL) == -1) {
    return errno;
}

pipe函数

int pipe(int fd[2]);

pipe()函数用于进程间通信，pipe只能用于单向通信，一个进程写，另一个进程读。
如果一个进程想要在另一个进程写入之前从管道中读取，则该进程将被暂停直到有内容被写入管道（类似于调用scanf()从控制台读取输入时的等待过程）。
其中fd[0]为读端的文件描述符，fd[1]为写端的文件描述符。
成功返回0，失败返回-1。

Linux管道命令(|)的实现

程序的参数为命令的列表，例如：

./pipe ls cat wc

主进程将会遍历参数列表，逐个创建子进程对命令进行处理。
子进程将会用管道将输入输出串联在一起。
第一个子进程从标准输入读取数据。
最后一个子进程向标准输出输出结果。
可以让子进程输出错误到stderr。

首先使用calloc创建子进程pid的列表，方便后续wait操作。

pid_t *pids = (pid_t *)calloc(argc, sizeof(pid_t));

遍历所有命令，为每个命令执行fork()。

如果返回负值，报告错误。
如果返回0，写子进程代码。
如果返回大于0，写父进程代码。

在fork()之前，我们需要创建管道用来重定向子进程的输入输出。

int fds[2];
if (pipe(fds) == -1)
{
    perror("pipe create error");
    return errno;
}

该代码创建一个管道，其中fds[0]为读端，fds[1]为写端。

所以我们要做的是，把进程的输出绑定到管道的写端，同时将下一个进程的输入绑定到管道的读端，这样前一个进程的输出就被传送到了后一个进程的输入。

思路清晰后，我们需要知道第一个进程的输入应该绑定到哪里。没错，应该是标准输入。同理最后一个进程的输出绑定标准输出。

for (int i = 1; i < argc; i++)
{
    // 创建管道
    // ...
    
    // fork子进程
    int ret = fork();
    if (ret < 0)
    {
        perror("fork error");
        return errno;
    }
    else if (ret == 0) // child
    {
        pids[i] = getpid();
		// 将输入绑定为前一个进程的输出，也就是上一个管道的读端
        // ...
        // 将输出绑定到新创建的管道的写端
        // ...
        return 0;
    }
    else // parent
    {
		// ...
    }
}

我们首先绑定子进程的输出到管道的写端：

for (int i = 1; i < argc; i++)
{
    // 创建管道
    // ...
    
    // fork子进程
    int ret = fork();
    if (ret < 0)
    {
        perror("fork error");
        return errno;
    }
    else if (ret == 0) // child
    {
        pids[i] = getpid();
		// 将输入绑定为前一个进程的输出，也就是上一个管道的读端
        // ...
        // 将输出绑定到新创建的管道的写端
        dup2(fds[1], STDOUT_FILENO);
        close(fds[1]); // 绑定后删掉原来重复的文件描述符
        return 0;
    }
    else // parent
    {
		// ...
    }
}

由于我们不止到前一个管道的读端文件描述符是什么，所以这里我们需要一个变量来存储这个文件描述符，命名为previous_out_fd，我们只需要让父进程维护这个变量，每次fork()之后将fd[0]保存到这个变量中即可。

重定向子进程的标准输入到上一个管道的读端：

int fds[2];
for (int i = 1; i < argc; i++)
{
    // 创建管道
    if (pipe(fds) == -1)
    {
        perror("pipe create error");
        return errno;
    }
    // fork子进程
    int ret = fork();
    if (ret < 0)
    {
        perror("fork error");
        return errno;
    }
    else if (ret == 0) // child
    {
        pids[i] = getpid();
        // 将输入绑定为前一个进程的输出，也就是上一个管道的读端
        dup2(previous_out_fd, STDIN_FILENO);
        close(previous_out_fd);// 绑定后删掉原来重复的文件描述符
        close(fds[0]); // 子进程不需要操作新创建的管道的读端
        
        // 将输出绑定到新创建的管道的写端
        dup2(fds[1], STDOUT_FILENO);
        close(fds[1]); // 绑定后删掉原来重复的文件描述符
        return 0;
    }
    else // parent
    {
        /* save the read end, which is the input of the next process */
        previous_out_fd = fds[0];

        close(fds[1]); // close write end

        /* we should'nt close the read end's fd before the child duplicated it */
    }
}

注意我们不能在fork()之前保存fds[0]的值，因为如果这样子进程中previous_out_fd的值就成了新创建的管道的读端了，我们要的是前一个管道的读端文件描述符。

重定向最后一个线程的输出到标准输出

由于父进程的标准输出本来就是tty，即控制台，因此最后一个子进程的标准输出不需要重定向，直接继承父进程即可。我们在循环中判断if (i != argc - 1)，当到达最后一个子进程时不修改其标准输出。

int fds[2];
for (int i = 1; i < argc; i++)
{
    // 创建管道
    if (pipe(fds) == -1)
    {
        perror("pipe create error");
        return errno;
    }
    // fork子进程
    int ret = fork();
    if (ret < 0)
    {
        perror("fork error");
        return errno;
    }
    else if (ret == 0) // child
    {
        pids[i] = getpid();
        // 将输入绑定为前一个进程的输出，也就是上一个管道的读端
        dup2(previous_out_fd, STDIN_FILENO);
        close(previous_out_fd);// 绑定后删掉原来重复的文件描述符
        close(fds[0]); // 子进程不需要操作新创建的管道的读端
        
        // 将输出绑定到新创建的管道的写端
        if (i != argc - 1)
            dup2(fds[1], STDOUT_FILENO);
        close(fds[1]); // 绑定后删掉原来重复的文件描述符
        
        
        
        return 0;
    }
    else // parent
    {
        /* save the read end, which is the input of the next process */
        previous_out_fd = fds[0];

        close(fds[1]); // close write end

        /* we should'nt close the read end's fd before the child duplicated it */
    }
}

用管道连接好各个子进程的输入输出后，我们就可以在子进程中执行程序代码了。我们这里直接使用前面提到的execlp()函数覆盖掉子进程原来的程序代码，直接执行新的程序。

int fds[2];
for (int i = 1; i < argc; i++)
{
    // 创建管道
    if (pipe(fds) == -1)
    {
        perror("pipe create error");
        return errno;
    }
    // fork子进程
    int ret = fork();
    if (ret < 0)
    {
        perror("fork error");
        return errno;
    }
    else if (ret == 0) // child
    {
        pids[i] = getpid();
        // 将输入绑定为前一个进程的输出，也就是上一个管道的读端
        dup2(previous_out_fd, STDIN_FILENO);
        close(previous_out_fd);// 绑定后删掉原来重复的文件描述符
        close(fds[0]); // 子进程不需要操作新创建的管道的读端
        
        // 将输出绑定到新创建的管道的写端
        if (i != argc - 1)
            dup2(fds[1], STDOUT_FILENO);
        close(fds[1]); // 绑定后删掉原来重复的文件描述符
        
        if (execlp(argv[i], argv[i], (char *)NULL) == -1)
        {
            fprintf(stderr, "pipe: %s: %s\n", argv[i], strerror(errno));
            return errno;
        }
        
        return 0;
    }
    else // parent
    {
        /* save the read end, which is the input of the next process */
        previous_out_fd = fds[0];

        close(fds[1]); // close write end

        /* we should'nt close the read end's fd before the child duplicated it */
    }
}

最后，为了防止僵尸进程，我们需要等待进程执行完毕，并且检测其返回值，如果出现错误，返回改错误代码。

int status;
for (int i = 1; i < argc; i++)
{
    waitpid(pids[i], &status, 0);
    int ret = WEXITSTATUS(status);
    if (ret != 0)
        return ret;
}

完整代码如下：

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <wait.h>
#include <string.h>

int main(int argc, char *argv[])
{
	if (argc <= 1)
	{
		printf("arguments should be at least 1\n");
		return -1;
	}

	pid_t *pids = (pid_t *)calloc(argc, sizeof(pid_t));

	/* output fd of the previous process, initialized to
	 * the stdout fd the parent process pointed to
	 */
	int previous_out_fd = dup(STDIN_FILENO);
	/* we should not close STDIN here otherwise the child will close it again */

	int fds[2];
	for (int i = 1; i < argc; i++)
	{
		if (pipe(fds) == -1)
		{
			perror("pipe create error");
			return errno;
		}
		int ret = fork();
		if (ret < 0)
		{
			perror("fork error");
			return errno;
		}
		else if (ret == 0) // child
		{
			pids[i] = getpid();

			/* assign the previous process's output fd to the stdin of the child process */
			dup2(previous_out_fd, STDIN_FILENO);
			close(previous_out_fd);

			close(fds[0]); // close read end

			/* if it is not the last process, assgin stdout of the child process
			 * to the write end of the pipe, which connects the next child process.
			 */
			if (i != argc - 1)
				dup2(fds[1], STDOUT_FILENO);
			close(fds[1]); // close write end

			if (execlp(argv[i], argv[i], (char *)NULL) == -1)
			{
				fprintf(stderr, "pipe: %s: %s\n", argv[i], strerror(errno));
				return errno;
			}
			return 0;
		}
		else // parent
		{
			/* save the read end, which is the input of the next process */
			previous_out_fd = fds[0];
			
			close(fds[1]); // close write end

			/* we should'nt close the read end's fd before the child duplicated it */
		}
	}

	int status;
	for (int i = 1; i < argc; i++)
	{
		waitpid(pids[i], &status, 0);
		int ret = WEXITSTATUS(status);
		if (ret != 0)
			return ret;
	}

	return 0;
}

gitee: https://gitee.com/duguxt/simple-pipe.git