random.sample 从集合或者序列中,无放回采样
用法:
sample_list=random.sample(population, k)
# population : 可以是一个集合,也可以是序列(list,tuple,或者字符串)
# k : 0<= k<= len(population)
# 返回一个list,如果是字符串的话,返回的是['index1','index2']随机选择的索引值
# 注意:不会改变原来的 population
测试:
import random
random.seed(66)
a1=list(range(0,10))
print(a1,type(a1))
b1= random.sample(a1,2)
print(b1)
print(a1)
"""
结果:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] <class 'list'>
[1, 4]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
"""
a2=tuple(range(0,10))
print(a2,type(a2))
b2= random.sample(a2,2)
print(b2)
print(a2)
"""
结果:
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) <class 'tuple'>
[6, 3]
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
"""
a3='1234567890'
print(a3,type(a3))
b3= random.sample(a3,2)
print(b3)
print(a3)
"""
结果:
1234567890 <class 'str'>
['8', '5']
1234567890
"""
a4=dict(spam = 1, egg = 2, bar =3)
print(a4,type(a4))
b4= random.sample(a4,2)
print(b4)
print(a4)
# 报错:不能处理dict
a5 = set(("Google", "Runoob", "Taobao"))
print(a5,type(a5))
b5= random.sample(a5,2)
print(b5)
print(a5)
"""
结果:
{'Runoob', 'Taobao', 'Google'} <class 'set'>
['Taobao', 'Runoob']
{'Runoob', 'Taobao', 'Google'}
"""
源代码:
def sample(self, population, k):
"""Chooses k unique random elements from a population sequence or set.
Returns a new list containing elements from the population while
leaving the original population unchanged. The resulting list is
in selection order so that all sub-slices will also be valid random
samples. This allows raffle winners (the sample) to be partitioned
into grand prize and second place winners (the subslices).
Members of the population need not be hashable or unique. If the
population contains repeats, then each occurrence is a possible
selection in the sample.
To choose a sample in a range of integers, use range as an argument.
This is especially fast and space efficient for sampling from a
large population: sample(range(10000000), 60)
"""
# Sampling without replacement entails tracking either potential
# selections (the pool) in a list or previous selections in a set.
# When the number of selections is small compared to the
# population, then tracking selections is efficient, requiring
# only a small set and an occasional reselection. For
# a larger number of selections, the pool tracking method is
# preferred since the list takes less space than the
# set and it doesn't suffer from frequent reselections.
if isinstance(population, _Set):
population = tuple(population)
if not isinstance(population, _Sequence):
raise TypeError("Population must be a sequence or set. For dicts, use list(d).")
randbelow = self._randbelow
n = len(population)
if not 0 <= k <= n:
raise ValueError("Sample larger than population or is negative")
result = [None] * k
setsize = 21 # size of a small set minus size of an empty list
if k > 5:
setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
if n <= setsize:
# An n-length list is smaller than a k-length set
pool = list(population)
for i in range(k): # invariant: non-selected at [0,n-i)
j = randbelow(n-i)
result[i] = pool[j]
pool[j] = pool[n-i-1] # move non-selected item into vacancy
else:
selected = set()
selected_add = selected.add
for i in range(k):
j = randbelow(n)
while j in selected:
j = randbelow(n)
selected_add(j)
result[i] = population[j]
return result