from sklearn.utils.extmath import randomized_svd
from scipy.sparse.linalg import svds
import numpy as np
# data.shape(6, 4)
data = np.array([[np.nan, 3, np.nan, 4],
[3, np.nan, 4, np.nan],
[np.nan, np.nan, 2, np.nan],
[5, np.nan, 3, 4],
[np.nan, np.nan, 4, np.nan],
[np.nan, 3, 3, np.nan]])
data[np.isnan(data)] = 0
u1,s1,v1=randomized_svd(data,4) # u1.shape(6, 4), s1.shape(4,), v1.shape(4, 4)
# s1:array([9.16751646, 5.46594686, 4.61133842, 1.67798238])
u2,s2,v2=svds(data,3) # 第二个参数k must be between 1 and min(data.shape)
# u2.shape(6, 3), s2.shape(3,), v2.shape(3, 4)
# s2:array([4.61133842, 5.46594686, 9.16751646])
u3,s3,v3=np.linalg.svd(data) # u3.shape(6, 6), s3.shape(4,), v3.shape(4, 4)
# s3:array([9.16751646, 5.46594686, 4.61133842, 1.67798238])
除了以上三种方式,还有sklearn.decomposition下的TruncatedSVD,具体内容自行查看手册。如下为手册中的例子。使用方式与以上几种方法稍有不同。
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import numpy as np
np.random.seed(0)
X_dense = np.random.rand(100, 100)
X_dense[:, 2 * np.arange(50)] = 0
X = csr_matrix(X_dense)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)
print(svd.explained_variance_ratio_)
print(svd.explained_variance_ratio_.sum())
print(svd.singular_values_)