python random.sample

random.sample 从集合或者序列中，无放回采样

用法：

sample_list=random.sample(population, k)
# population : 可以是一个集合，也可以是序列（list,tuple，或者字符串）
# k : 0<= k<= len(population) 
# 返回一个list，如果是字符串的话，返回的是['index1','index2']随机选择的索引值
# 注意：不会改变原来的 population

测试：

import random

random.seed(66)
a1=list(range(0,10))
print(a1,type(a1))
b1= random.sample(a1,2)
print(b1)
print(a1)
"""
结果：
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] <class 'list'>
[1, 4]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
"""

a2=tuple(range(0,10))
print(a2,type(a2))
b2= random.sample(a2,2)
print(b2)
print(a2)
"""
结果：
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) <class 'tuple'>
[6, 3]
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
"""

a3='1234567890'
print(a3,type(a3))
b3= random.sample(a3,2)
print(b3)
print(a3)
"""
结果：
1234567890 <class 'str'>
['8', '5']
1234567890
"""

a4=dict(spam = 1, egg = 2, bar =3)
print(a4,type(a4))
b4= random.sample(a4,2)
print(b4)
print(a4)
# 报错：不能处理dict

a5 = set(("Google", "Runoob", "Taobao"))
print(a5,type(a5))
b5= random.sample(a5,2)
print(b5)
print(a5)
"""
结果：
{'Runoob', 'Taobao', 'Google'} <class 'set'>
['Taobao', 'Runoob']
{'Runoob', 'Taobao', 'Google'}
"""

源代码：

    def sample(self, population, k):
        """Chooses k unique random elements from a population sequence or set.

        Returns a new list containing elements from the population while
        leaving the original population unchanged.  The resulting list is
        in selection order so that all sub-slices will also be valid random
        samples.  This allows raffle winners (the sample) to be partitioned
        into grand prize and second place winners (the subslices).

        Members of the population need not be hashable or unique.  If the
        population contains repeats, then each occurrence is a possible
        selection in the sample.

        To choose a sample in a range of integers, use range as an argument.
        This is especially fast and space efficient for sampling from a
        large population:   sample(range(10000000), 60)
        """

        # Sampling without replacement entails tracking either potential
        # selections (the pool) in a list or previous selections in a set.

        # When the number of selections is small compared to the
        # population, then tracking selections is efficient, requiring
        # only a small set and an occasional reselection.  For
        # a larger number of selections, the pool tracking method is
        # preferred since the list takes less space than the
        # set and it doesn't suffer from frequent reselections.

        if isinstance(population, _Set):
            population = tuple(population)
        if not isinstance(population, _Sequence):
            raise TypeError("Population must be a sequence or set.  For dicts, use list(d).")
        randbelow = self._randbelow
        n = len(population)
        if not 0 <= k <= n:
            raise ValueError("Sample larger than population or is negative")
        result = [None] * k
        setsize = 21        # size of a small set minus size of an empty list
        if k > 5:
            setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
        if n <= setsize:
            # An n-length list is smaller than a k-length set
            pool = list(population)
            for i in range(k):         # invariant:  non-selected at [0,n-i)
                j = randbelow(n-i)
                result[i] = pool[j]
                pool[j] = pool[n-i-1]   # move non-selected item into vacancy
        else:
            selected = set()
            selected_add = selected.add
            for i in range(k):
                j = randbelow(n)
                while j in selected:
                    j = randbelow(n)
                selected_add(j)
                result[i] = population[j]
        return result

python random.sample

random.sample 从集合或者序列中，无放回采样

悦读