Bootstrap

【C++】string底层的实现原理(简单详细)

前言

本篇文章我将按照C++文档库中的模块顺序来实现和讲解其实现原理,我们只讲各板块中常用的

目录

一,Member functions(成员函数)

二、Iterators(迭代器)

三、Capacity(容器)

  1. 常见容器的实现
  2. 重点容器代码思想剖析

    四、Element access(成员访问)

    五、Modifiers(修改器)

    1. 常见修改器的实现
    2. 重点修改器代码思想剖析

      六、String operations(字符串操作)
      七、Non-member function overloads(非成员函数重载)
      八、整合版

      一、Member functions(成员函数)
      namespace L
      {
      	class string
      	{
      	public:
      		//构造函数
      		string(const char* str="")
      			:_size(strlen(str))
      		{
      			_capacity = _size;
      			_str = new char[_capacity + 1];
      			strcpy(_str, str);
      		}
      		//拷贝构造
      		string(const string& str)
      		{
      			_str = new char[str._capacity + 1];
      			strcpy(_str, str._str);
      			_capacity = str._capacity;
      			_size = str._size;
      		}
      		//析构函数
      		~string()
      		{
      			delete[] _str;
      			_str = nullptr;
      			_size = _capacity = 0;
      		}
      	private:
      		char* _str;//指向字符串的指针
      		size_t _size;//字符串的有效字符个数
      		size_t _capacity;//字符串的容量大小
      	};
      }
      
      二、Iterators(迭代器)
      namespace L
      {
      	class string
      	{
      	public:
      		typedef char* iterator;
      		typedef const char* const_iterator;
      		
      		iterator begin()
      		{
      			return _str;
      		}
      		iterator end()
      		{
      			return _str + _size;
      		}
      		const_iterator begin() const
      		{
      			return _str;
      		}
      		const_iterator end() const
      		{
      			return _str + _size;
      		}
      	private:
      		char* _str;//指向字符串的指针
      		size_t _size;//字符串的有效字符个数
      		size_t _capacity;//字符串的容量大小
      	};
      }
      

      string的迭代器我这里是用指针来实现的,迭代器主要就是通过begin(),end()来遍历字符串,由于我们实现了begin()和end()两个成员函数,所以我们在遍历的时候可以使用范围for来遍历字符串,范围for的底层就是替换成了迭代器

      三、Capacity(容器)

      1.常见容器的实现

      namespace L
      {
      	class string
      	{
      	public:
      		size_t size() const
      		{
      			return _size;
      		}
      		size_t capacity() const
      		{
      			return _capacity;
      		}
      		bool empty() const
      		{
      			return _size == 0;
      		}
      		void clear()
      		{
      			_size = 0;
      			_str[_size] = '\0';
      		}
      		void resize(size_t n, char ch='\0')
      		{
      			if (n <= _size)
      			{
      				_str[n] = '\0';
      				_size = n;
      			}
      			else
      			{
      				reserve(n);
      				for (size_t i = _size; i < n; i++)
      				{
      					_str[i] = ch;
      				}
      				_size = n;
      			}
      		}
      		void reserve(size_t n)
      		{
      			if (n > _capacity)
      			{
      				char* temp = new char[n+1];
      				strcpy(temp, _str);
      				delete[] _str;
      				_str = temp;
      				_capacity = n;
      			}
      		}
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	};
      }
      

      2.重点代码思想剖析

      这个板块我们重点讲解两个成员函数,resize()和reserve()
      1、resize(size_t n, char ch='\0');
      ①功能描述:当n<=size (字符串的长度)时,只保留字符串的前n个字符;当n>size时 会引发扩容,并且从size位置开始一直到n位置的值都为ch;
      ②实现思想:当n<=size时,直接给n位置赋值成 ‘\0’(因为字符串的结束标识是\0),然后将有效字符个数改为n即可;当n>size时,从字符串的末尾开始添加字符直到size=n为止

      2、void reserve(size_t n);
      ①功能描述:reserve 主要是扩容,避免capacity多次扩容,影响效率;n<size时:不会缩容,也不会扩容;n>capacity时:会扩容
      ②实现思想:当n>capacity时,使用new动态开辟出一块大小为n+1的空间,多开1个空间是用来存放\0的,使用strcpy将原来的数据拷贝到新开的空间中,然后释放掉旧空间

      四、Element access(成员访问)
      namespace L
      {
      	class string
      	{
      	public:
      		char& operator[](size_t n) const
      		{
      			return _str[n];
      		}
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	};
      }
      
      五、Modifiers(修改器)
      1、 常见修改器的实现
      namespace L
      {
      	class string
      	{
      	public:
      		void swap(string& s) 
      		{
      			std::swap(_str, s._str);
      			std::swap(_size, s._size);
      			std::swap(_capacity, s._capacity);
      		}
      		void erase(size_t pos = 0, size_t len = npos)
      		{
      			assert(pos < _size);
      			if (len == npos || len >= _size - pos)
      			{
      				_str[pos] = '\0';
      				_size = pos + 1;
      			}
      			else
      			{
      				strcpy(_str + pos, _str + pos + len);
      				_size -= len;
      			}
      		}
      		void push_back(char ch)
      		{
      			if (_size == _capacity)
      			{
      				reserve(_capacity == 0 ? 4 : 2 * _capacity);
      			}
      			_str[_size] = ch;
      			_size++;
      			_str[_size] = '\0';
      		}
      		void append(const char* str)
      		{
      			size_t len = strlen(str);
      			if (len + _size > _capacity)
      			{
      				reserve(len + _size);
      			}
      			strcpy(_str + _size, str);
      			_size += len;
      		}
      		void insert(size_t pos, char ch)
      		{
      			assert(pos <= _size);
      			if (_size == _capacity)
      			{
      				reserve(_capacity == 0 ? 4 : 2 * _capacity);
      			}
      			size_t end = _size + 1;
      			while (pos < end)
      			{
      				_str[end] = _str[end - 1];
      				end--;
      			}
      			_str[pos] = ch;
      			_size++;
      		}
      		void insert(size_t pos,const char* str)
      		{
      			assert(pos <= _size);
      			int len = strlen(str);
      			if (len + _size > _capacity)
      			{
      				reserve(len + _size);
      			}
      			int end = _size + len;
      			while ((int)pos <= end - len)
      			{
      				_str[end] = _str[end - len];
      				end--;
      			}
      			strncpy(_str + pos, str, len);
      			_size += len;
      		}
      
      		string& operator+=(const char ch)
      		{
      			push_back(ch);
      			return *this;
      		}
      		string& operator+=(const char* str)
      		{
      			append(str);
      			return *this;
      		}
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	};
      }
      

      2、重点修改器的代码剖析
      void insert(size_t pos,const char* str);
      ①功能描述:往字符串中的任意位置插入一个字符串
      ②实现思想:先判断插入进来的字符串的长度+现有的有效字符个数会不会超过该字符串的容量,超过了就扩容,然后从pos位置开始,将其后的字符全部挪动len个字符,然后再使用strncpy拷贝这个字符串到其要插入的位置

      tips:容易犯错的点

      	void insert(size_t pos,const char* str)
      	{
      		assert(pos <= _size);
      		size_t len = strlen(str);
      		if (len + _size > _capacity)
      		{
      			reserve(len + _size);
      		}
      		size_t end = _size + len;
      		while (pos <= end - len)
      		{
      			_str[end] = _str[end - len];
      			end--;
      		}
      		strncpy(_str + pos, str, len);
      		_size += len;
      	}
      

      上面代码出现的问题,如下图解释

      在这里插入图片描述

      六、String operations(字符串操作)
      namespace L
      {
      	class string
      	{
      	public:
      		string substr(size_t pos = 0, size_t len = npos) const
      		{
      			assert(pos < _size);
      			string temp;
      			if (len == npos || len >= _size - pos)
      			{
      				strcpy(temp._str, _str + pos);
      				return temp;
      			}
      			else
      			{
      				for (size_t i = pos; i < len; i++)
      				{
      					temp += _str[i];
      				}
      				_str[len] = '\0';
      				return temp;
      			}
      		}
      		size_t find(char c, size_t pos = 0) const
      		{
      			assert(pos < _size);
      			for (size_t i = 0; i < _size; i++)
      			{
      				if (_str[i] == c)
      				{
      					return i;
      				}
      			}
      			return npos;
      		}
      		size_t find(const char* s, size_t pos = 0) const
      		{
      			assert(pos < _size);
      			char* p=strstr(_str, s);
      			if (p)
      			{
      				return p - _str;//指针相减得到他们之间的字符个数
      			}
      			else
      			{
      				return npos;
      			}
      		}
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	};
      }
      
      七、Non-member function overloads(非成员函数重载)
      namespace L
      {
      	class string
      	{
      	public:
      		
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	};
      	ostream& operator<<(ostream& out, const string& str)
      	{
      		for (size_t i = 0; i <str.size(); i++)
      		{
      			out << str[i];
      		}
      		return out;
      	}
      	istream& operator>>(istream& in, string& str)
      	{
      		char ch;
      		ch = in.get();//get函数用于从输入流上读取一个字符
      		while (ch != ' ' && ch != '\n')
      		{
      			str += ch;
      			ch = in.get();
      		}
      		return in;
      	}
      	void swap(string& s1, string& s2)
      	{
      		s1.swap(s2);
      	}
      }
      

      流插入和流提取为什么要实现在类外?

      因为在类里面首个参数会被隐含的this指针占据,在调用的时候会很别扭,比如str<<cout;所以为了避免这种情况,我们还是写在类外面

      八、整合版

      #pragma once
      #include<iostream>
      #include<assert.h>
      using namespace std;
      
      namespace L
      {
      	class string
      	{
      	public:
      		typedef char* iterator;
      		typedef const char* const_iterator;
      		
      		iterator begin()
      		{
      			return _str;
      		}
      		iterator end()
      		{
      			return _str + _size;
      		}
      		const_iterator begin() const
      		{
      			return _str;
      		}
      		const_iterator end() const
      		{
      			return _str + _size;
      		}
      		string(const char* str="")
      			:_size(strlen(str))
      		{
      			_capacity = _size;
      			_str = new char[_capacity + 1];
      			strcpy(_str, str);
      		}
      		//s1(s2)
      		string(const string& str)
      		{
      			_str = new char[str._capacity + 1];
      			strcpy(_str, str._str);
      			_capacity = str._capacity;
      			_size = str._size;
      		}
      		~string()
      		{
      			delete[] _str;
      			_str = nullptr;
      			_size = _capacity = 0;
      		}
      
      		void reserve(size_t n)
      		{
      			if (n > _capacity)
      			{
      				char* temp = new char[n+1];
      				strcpy(temp, _str);
      				delete[] _str;
      				_str = temp;
      				_capacity = n;
      			}
      		}
      		void push_back(char ch)
      		{
      			if (_size == _capacity)
      			{
      				reserve(_capacity == 0 ? 4 : 2 * _capacity);
      			}
      			_str[_size] = ch;
      			_size++;
      			_str[_size] = '\0';
      		}
      		void append(const char* str)
      		{
      			size_t len = strlen(str);
      			if (len + _size > _capacity)
      			{
      				reserve(len + _size);
      			}
      			strcpy(_str + _size, str);
      			_size += len;
      		}
      		void insert(size_t pos, char ch)
      		{
      			assert(pos <= _size);
      			if (_size == _capacity)
      			{
      				reserve(_capacity == 0 ? 4 : 2 * _capacity);
      			}
      			size_t end = _size + 1;
      			while (pos < end)
      			{
      				_str[end] = _str[end - 1];
      				end--;
      			}
      			_str[pos] = ch;
      			_size++;
      		}
      		void insert(size_t pos,const char* str)
      		{
      			assert(pos <= _size);
      			int len = strlen(str);
      			if (len + _size > _capacity)
      			{
      				reserve(len + _size);
      			}
      			int end = _size + len;
      			while ((int)pos <= end - len)
      			{
      				_str[end] = _str[end - len];
      				end--;
      			}
      			strncpy(_str + pos, str, len);
      			_size += len;
      		}
      
      		string& operator+=(const char ch)
      		{
      			push_back(ch);
      			return *this;
      		}
      		string& operator+=(const char* str)
      		{
      			append(str);
      			return *this;
      		}
      		size_t size() const
      		{
      			return _size;
      		}
      		size_t capacity() const
      		{
      			return _capacity;
      		}
      		bool empty() const
      		{
      			return _size == 0;
      		}
      		void clear()
      		{
      			_size = 0;
      			_str[_size] = '\0';
      		}
      		void swap(string& s) 
      		{
      			std::swap(_str, s._str);
      			std::swap(_size, s._size);
      			std::swap(_capacity, s._capacity);
      		}
      		char& operator[](size_t n) const
      		{
      			return _str[n];
      		}
      		void resize(size_t n, char ch='\0')
      		{
      			if (n <= _size)
      			{
      				_str[n] = '\0';
      				_size = n;
      			}
      			else
      			{
      				reserve(n);
      				for (size_t i = _size; i < n; i++)
      				{
      					_str[i] = ch;
      				}
      				_size = n;
      			}
      		}
      
      		size_t find(char c, size_t pos = 0) const
      		{
      			assert(pos < _size);
      			for (size_t i = 0; i < _size; i++)
      			{
      				if (_str[i] == c)
      				{
      					return i;
      				}
      			}
      			return npos;
      		}
      		size_t find(const char* s, size_t pos = 0) const
      		{
      			assert(pos < _size);
      			char* p=strstr(_str, s);
      			if (p)
      			{
      				return p - _str;
      			}
      			else
      			{
      				return npos;
      			}
      		}
      		void erase(size_t pos = 0, size_t len = npos)
      		{
      			assert(pos < _size);
      			if (len == npos || len >= _size - pos)
      			{
      				_str[pos] = '\0';
      				_size = pos + 1;
      			}
      			else
      			{
      				strcpy(_str + pos, _str + pos + len);
      				_size -= len;
      			}
      		}
      
      		string substr(size_t pos = 0, size_t len = npos) const
      		{
      			assert(pos < _size);
      			string temp;
      			if (len == npos || len >= _size - pos)
      			{
      				strcpy(temp._str, _str + pos);
      				return temp;
      			}
      			else
      			{
      				for (size_t i = pos; i < len; i++)
      				{
      					temp += _str[i];
      				}
      				_str[len] = '\0';
      				return temp;
      			}
      		}
      	private:
      		char* _str;
      		size_t _size;
      		size_t _capacity;
      	public:
      		static const int npos;
      	};
      	const int string::npos = -1;//静态成员在全局初始化
      
      	ostream& operator<<(ostream& out, const string& str)
      	{
      		for (size_t i = 0; i <str.size(); i++)
      		{
      			out << str[i];
      		}
      		return out;
      	}
      	istream& operator>>(istream& in, string& str)
      	{
      		char ch;
      		ch = in.get();//get函数用于从输入流上读取一个字符
      		while (ch != ' ' && ch != '\n')
      		{
      			str += ch;
      			ch = in.get();
      		}
      		return in;
      	}
      	void swap(string& s1, string& s2)
      	{
      		s1.swap(s2);
      	}
      }
      
      ;