Linux进程（五）—— 进程与文件系统

文件 = 内容 + 属性

对文件的操作，也就是对内容的操作或者对属性的操作。

当文件没有被操作的时候，文件一般会在磁盘上。

当我们对文件进行操作的时候，文件需要提前被load到内存，至少要有属性。

操作文件的时候，第一件事是打开文件，所以打开文件本质就是将需要的文件属性加载到内存中，OS内部一定会同时存在大量被打开的文件。

操作系统管理文件的方式：先描述，再组织。

先描述，就是构建在内存中的文件结构体 struct file{属性(可以从磁盘获取), struct file *next} 。

每一个被打开的文件，都要在OS内对应文件对象的struct结构体，可以将所有的struct file结构体用某种数据结构链接起来，在OS内部，对被打开的文件进行管理，就被转换成了对链表的增删查改。(文件被打开，OS就要为被打开的文件创建对应的内核数据结构)
```
struct file
{
		//各种属性
		//各种链接关系
}
```

文件其实可以被分为两大类：(1)磁盘文件，(2)被打开文件(内存文件).

文件是被OS打开的，但是是用户(以进程为代表的)让OS打开的。

之前所有的文件操作，都是进程(struct task_struct)和被打开文件(struct file)之间的关系。

C语言文件操作

#include <stdio.h>

FILE *fopen(const char *path,const char *mode);
//w:默认写方式打开文件，如果文件不存在，就创建它。
//默认如果只是打开，文件内容会自动被清空。 
//同时，每次进行写入的时候，都会从最开始进行写入。

int printf(const char *format, ...);   //默认向显示器打印消息
int fprintf(FILE *stream, const char *format, ...);  //指定文件流，向指定文件打印
int sprintf(char *str, const char *format, ...);   //不安全
int snprintf(char *str, size_t size, const char *format, ...);

int fclose(FILE *fp);

#include <stdio.h>

#define TEXT "text.txt"

int main()
{
	//w:默认写方式打开文件，如果文件不存在，就创建它。
	  //1.默认如果只是打开，文件内容会自动被清空。 
	  //2.同时，每次进行写入的时候，都会从最开始进行写入。
	//a:追加写入，不会清空对应文件，而是每次写入都是从文件结尾写入的。
	//FILE *fp = fopen(TEXT, "w");
	//FILE *fp = fopen(TEXT, "a");
	FILE *fp = fopen(TEXT, "r");
	if(fp = NULL)
	{
		perror("fopen");
	}
	
	

	//进行文件操作
	//读
	while(1)
	{
		char line[128]
		if(fgets(line,sizeof(line),fp) == NULL)
			break;
		else
			printf("%s\\n",line);
	}
	
	//写
	//const char *message = "hello world";
	//for(int n = 5;n;n--)
	//{
		//fputs(msg,fp);
		//fprintf(fp,"%d :%s\\n",n,message);
		//fprintf(stdout,"%d :%s\\n",n,message);  //Linux一切皆文件，stdout也对应一个文件，显示器文件
		
		//char buffer[256];
		//snprintf(buffer,sizeof(buffer),"%d :%s\\n",n,message);
		//fputs(buffer,fp);
	//}

	//return 0;
}

系统文件操作

//打开文件
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int open(const char *pathname, int flags);
//成功时返回文件描述符[**文件描述符**](<https://www.notion.so/377881d36c494fd19cd72ba10c2623be?pvs=21>)，失败返回-1
int open(const char *pathname, int flags, mode_t mode);
int creat(const char *pathname, mode_t mode);

//关闭文件
#include <unistd.h>
int close(int fd);

//权限掩码
#include <sys/types.h>
#include <sys/stat.h>
mode_t umask(mode_t mask);
//创建当前进程自己的umask，系统和自己的，按就近原则

//写入
#include <unistd.h>
ssize_t write(int fd, const void *buf, size_t count);

//读取
#include <unistd.h>
ssize_t read(int fd, void *buf, size_t count);
//返回读取到的字节数，如果读到文件结尾返回0，失败返回-1

#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>

#define TEXT "text.txt"

int main()
{
	//fopen(TEXT,"w");
	//C语言的库函数底层调用系统调用
	umask(0);
	//O_CREAT | O_WRONLY 不会对原始内容做清空
	//int fd = open(TEXT,O_CREAT | O_WRONLY, 0666);
	//int fd = open(TEXT,O_CREAT | O_WRONLY | O_TRUNC, 0666);	 //O_TRUNC 清空文件
	//int fd = open(TEXT,O_CREAT | O_WRONLY | O_APPEND, 0666);   //O_APPEND追加写入必须要有O_WRONLY
	int fd = open(TEXT,O_RDONLY | O_APPEND, 0666);
	
	if(fd == -1)
	{
		printf("fd: %d, errno: %d, errstring: %s",fd,errno,errstring(errno));
	}
	else
		printf("fd: %d, errno: %d, errstring: %s",fd,errno,errstring(errno));
	
	const char *message = "hello world";
	for(int n = 5;n;n--)
	{
		char line[128];
		snprintf(line,sizeof(line),"%d :%s\\n",n,message);
		write(fd,line,strlen(line));   //这里strlen()不要+1，因为'\\0'是C语言的规定，不是文件的规定。
		
	}

	close(fd);
	
	return 0;
}

OS一般如何让用户给自己传递标志位的

//我们自己传标志位
int XXX(int flag,int flag1,int flag2)  //如果想同时传递多个标志位不方便。

//系统传递标志位
int YYY(int flag)   
//flag是int类型，有32个比特位，可以用一个比特位表示一个标志位。
//这种方式称为位图。

C/C++的库都是对系统调用的封装。

只要是要访问硬件，或者操作系统内的一些资源，都必须要调系统调用。

文件描述符

任意一个进程，在启动的时候，默认会打开当前进程的三个文件：标准输入，标准输出，标准错误。本质都是文件，而stdin，stdout，stderr 是文件在语言层的表现。

标准输入对应的设备文件 → 键盘文件

标准输出对应的设备文件 → 显示器文件

标准错误对应的设备文件 → 显示器文件

打开文件后可以发现文件默认的文件描述符是3，那么0、1、2去哪了呢？

0、1、2分别对应标准输入、标准输入和标准错误，所以0、1、2被系统占用了。

文件描述符(open的返回值)的本质就是数组下标。

IO类read，write本质是拷贝函数，用户空间和内核空间的数据来回拷贝。

采用指针的方式对进程管理和文件管理两部分解耦合。

理解linux下一切皆文件。

使用OS的本质：通过进程的方式访问OS。

操作系统层面，必须要访问fd(文件描述符),我们才能找到文件。

FILE *fopen(const char *path,const char *mode);
//FILE是什么？   是C语言提供的一个结构体
//FILE结构体中必定封装了fd。

文件描述符分配规则

#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main()
{
	close(0);
	int fd1 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	int fd2 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	int fd3 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	int fd4 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	int fd5 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	int fd6 = open(LOG, O_WRONLY | O_CREAT | O_TRUNC, 0666);

	printf("%d\\n", fd1);
	printf("%d\\n", fd2);
	printf("%d\\n", fd3);
	printf("%d\\n", fd4);
	printf("%d\\n", fd5);
	printf("%d\\n", fd6);
	
	return 0;
}

进程中文件描述符分配规则：将在文件描述符表中，最小的没有被使用的数组元素，分配给新文件。

重定向原理

//输入重定向
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LOG "log.txt"

int main()
{
  
  close(0);   //stdin的文件描述符为0
  int fd1 = open(LOG,O_RDONLY,0666);  //fd1 = 0

	int a,b;
	scanf("%d%d",&a,&b);
	printf("a = %d, b = %d",a,b);

  return 0;
}

//输出重定向
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LOG "log.txt"

int main()
{
  
  close(1);   //stdout的文件描述符为1
  int fd1 = open(LOG,O_WRONLY | O_CREAT | O_TRUNC,0666);

  printf("you can see me !\\n"); //stdout -> 1
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");

  return 0;
}

可以看见本应打印到屏幕上的字符串转而打印进了文件。

//追加重定向
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LOG "log.txt"

int main()
{
  
  close(1);   //stdout的文件描述符为1
  int fd1 = open(LOG,O_WRONLY | O_CREAT | O_APPEND,0666);

  printf("you can see me !\\n"); //stdout -> 1
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");
  printf("you can see me !\\n");

  return 0;
}

重定向的原理：在上层无法感知的情况下，在OS内部，更改进程对应的文件描述符中，特定下标的指向。

所以输出重定向无法将stderr，cerr打印进文件的原因是：

stdout，cout的文件描述符是1，向文件描述符1对应的文件打印。 stderr，cerr的文件描述符是2，向文件描述符2对应的文件打印。而输出重定向只改了文件描述符1的指向。

./a.out > log.txt 2>&1   //输出重定向+错误重定向
./a.out 1>log.txt 2>error.txt   //分别输出重定向，错误重定向

重定向的系统调用

#include <unistd.h>

int dup(int oldfd);
int dup2(int oldfd, int newfd);
//要保留是oldfd，被覆盖的是newfd

#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LOG "log.txt"

int main()
{
    int fd = open(LOG,O_WRONLY | O_CREAT | O_APPEND,0666);
    if(fd < 0)
    {
      perror("open error");
      return 1;
    }
    dup2(fd,1);

    printf("hello world!");
    close(fd);
	
		return 0;
}

缓冲区

1.C库的刷新策略

1、无缓冲 2、行缓冲：遇到”\n”,会把”\n”前所以内容刷新到操作系统中 3、全缓冲：只有在把缓冲区写满时才会刷新。

显示器采用的刷新策略：行缓存
普通文件采用的刷新策略：全缓冲

为什么要有缓冲区？

节省调用者时间。

缓冲区在哪里？

在进行fopen打开文件的时候，会得到FILE结构体，缓冲区就在这个FILE结构体中。

#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

int main()
{
    //C库
    fprintf(stdout, "hello fprintf\\n");
    //系统调用
    const char *msg = "hello write\\n";
    write(1, msg, strlen(msg)); //+1?

    fork(); //????
    return 0;
}

这个现象较容易理解，因为是打印到显示器的，而显示器的刷新策略是行缓冲，遇到”\n“就刷新缓冲区打印数据，所以fork()没有任何作用，因为缓冲区已经刷新过了。write是系统调用，不用考虑缓冲区的问题。

由于重定向到文件里了，这时刷新策略就变成了全缓冲，将数据写入缓冲区，而”hello fprintf“这一个数据不能将缓冲区写满，在fork()之前write已经将数据写入到操作系统了，而fprintf的数据还在缓冲区，fork()后，fprintf的数据属于父进程，但是父进程和子进程都要进行刷新缓冲区，而刷新缓冲区的本质就是对缓冲区做清空，所以刷新时，谁先刷新就会发生写时拷贝，所以文件中就有两条”hello fprintf“。