Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# ------------------------------------------------------------------------------ 

2# Copyright (C) 2020 Maximilian Stahlberg 

3# 

4# This file is part of PICOS. 

5# 

6# PICOS is free software: you can redistribute it and/or modify it under the 

7# terms of the GNU General Public License as published by the Free Software 

8# Foundation, either version 3 of the License, or (at your option) any later 

9# version. 

10# 

11# PICOS is distributed in the hope that it will be useful, but WITHOUT ANY 

12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 

13# A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

14# 

15# You should have received a copy of the GNU General Public License along with 

16# this program. If not, see <http://www.gnu.org/licenses/>. 

17# ------------------------------------------------------------------------------ 

18 

19"""Implements :class:`Samples`.""" 

20 

21import cvxopt 

22 

23from .. import glyphs 

24from ..apidoc import api_end, api_start 

25from ..caching import cached_property 

26from .data import cvxopt_hcat, load_data, load_shape 

27from .exp_affine import ComplexAffineExpression, Constant 

28from .expression import Expression 

29 

30_API_START = api_start(globals()) 

31# ------------------------------- 

32 

33 

34class Samples(): 

35 """A collection of data points. 

36 

37 :Example: 

38 

39 >>> from picos.expressions import Samples 

40 >>> # Load the column-major vectorization of six matrices. 

41 >>> data = [[[1*i, 3*i], 

42 ... [2*i, 4*i]] for i in range(1, 7)] 

43 >>> S = Samples(data) 

44 >>> S 

45 <Samples: (6 4-dimensional samples)> 

46 >>> [S.num, S.dim, S.original_shape] # Metadata. 

47 [6, 4, (2, 2)] 

48 >>> S.matrix # All samples as the columns of one matrix. 

49 <4×6 Real Constant: [4×6]> 

50 >>> print(S.matrix) 

51 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00 5.00e+00 6.00e+00] 

52 [ 2.00e+00 4.00e+00 6.00e+00 8.00e+00 1.00e+01 1.20e+01] 

53 [ 3.00e+00 6.00e+00 9.00e+00 1.20e+01 1.50e+01 1.80e+01] 

54 [ 4.00e+00 8.00e+00 1.20e+01 1.60e+01 2.00e+01 2.40e+01] 

55 >>> print(S[0].T) # The first sample (transposed for brevity). 

56 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00] 

57 >>> print(S.mean.T) # The sample mean (transposed for brevity). 

58 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01] 

59 >>> print(S.covariance) # The sample covariance matrix. 

60 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01] 

61 [ 7.00e+00 1.40e+01 2.10e+01 2.80e+01] 

62 [ 1.05e+01 2.10e+01 3.15e+01 4.20e+01] 

63 [ 1.40e+01 2.80e+01 4.20e+01 5.60e+01] 

64 >>> print(S.original[0]) # The first sample in its original shape. 

65 [ 1.00e+00 3.00e+00] 

66 [ 2.00e+00 4.00e+00] 

67 >>> U = S.select([0, 2, 4]) # Select a subset of samples by indices. 

68 >>> print(U.matrix) 

69 [ 1.00e+00 3.00e+00 5.00e+00] 

70 [ 2.00e+00 6.00e+00 1.00e+01] 

71 [ 3.00e+00 9.00e+00 1.50e+01] 

72 [ 4.00e+00 1.20e+01 2.00e+01] 

73 >>> T, V = S.partition() # Split into training and validation samples. 

74 >>> print(T.matrix) 

75 [ 1.00e+00 2.00e+00 3.00e+00] 

76 [ 2.00e+00 4.00e+00 6.00e+00] 

77 [ 3.00e+00 6.00e+00 9.00e+00] 

78 [ 4.00e+00 8.00e+00 1.20e+01] 

79 >>> print(V.matrix) 

80 [ 4.00e+00 5.00e+00 6.00e+00] 

81 [ 8.00e+00 1.00e+01 1.20e+01] 

82 [ 1.20e+01 1.50e+01 1.80e+01] 

83 [ 1.60e+01 2.00e+01 2.40e+01] 

84 """ 

85 

86 def __new__(cls, samples=None, forced_original_shape=None, **kwargs): 

87 """Prepare a :class:`Samples` instance.""" 

88 if isinstance(samples, cls): 

89 if forced_original_shape is not None: 

90 forced_shape = load_shape(forced_original_shape) 

91 if forced_shape[0]*forced_shape[1] != samples.dim: 

92 raise ValueError("Incompatible forced original shape.") 

93 

94 if forced_shape == samples.original_shape: 

95 # Shapes are consistent, return the existing instance. 

96 return samples 

97 else: 

98 # Make a shallow copy and change only the original shape. 

99 self = object.__new__(cls) 

100 self._cached_cvx_mat = samples._cached_cvx_mat 

101 self._cached_cvx_vec = samples._cached_cvx_vec 

102 self._cached_pic_mat = samples._cached_pic_mat 

103 self._cached_pic_vec = samples._cached_pic_vec 

104 self._original_shape = forced_shape 

105 return self 

106 else: 

107 # Return the existing instance. 

108 return samples 

109 else: 

110 # Return a new instance. 

111 self = object.__new__(cls) 

112 self._cached_cvx_mat = None 

113 self._cached_cvx_vec = None 

114 self._cached_pic_mat = None 

115 self._cached_pic_vec = None 

116 self._original_shape = None 

117 return self 

118 

119 def __init__(self, samples, forced_original_shape=None, always_copy=True): 

120 """Load a number of data points (samples). 

121 

122 :param samples: 

123 Any of the following: 

124 

125 - A tuple or list of constants, each of which denotes a sample 

126 vector. Matrices are vectorized but their :attr:`original_shape` 

127 is stored and may be used by PICOS internally. 

128 - A constant row or column vector whose entries denote scalar 

129 samples. 

130 - A constant matrix whose columns denote the samples. 

131 - Another :class:`Samples` instance. If possible, it is returned as 

132 is (:class:`Samples` instances are immutable), otherwise a shallow 

133 copy with the necessary modifications is returned instead. 

134 

135 In any case, constants may be given as constant numeric data values 

136 (anything recognized by :func:`~.data.load_data`) or as constant 

137 PICOS expressions. 

138 

139 :param forced_original_shape: 

140 Overwrites :attr:`original_shape` with the given shape. 

141 

142 :param bool always_copy: 

143 If this is :obj:`False`, then data that is provided in the form of 

144 CVXOPT types is not copied but referenced if possible. This can 

145 speed up instance creation but will introduce inconsistencies if the 

146 original data is modified. Note that this argument has no impact if 

147 the ``samples`` argument already is a :class:`Samples` instance; in 

148 this case data is never copied. 

149 """ 

150 if isinstance(samples, Samples): 

151 # Handled in __new__. 

152 return 

153 elif isinstance(samples, (tuple, list)): 

154 if not samples: 

155 raise ValueError("Need at least one sample.") 

156 

157 if all(isinstance(s, (int, float, complex)) for s in samples): 

158 # Efficiently handle a list of scalars. 

159 self._cached_cvx_mat = load_data(samples)[0].T 

160 elif all(isinstance(s, ComplexAffineExpression) 

161 and s.constant for s in samples): 

162 if len(set(s.shape for s in samples)) != 1: 

163 raise ValueError("Cannot load samples of differing shapes.") 

164 

165 self._original_shape = samples[0].shape 

166 self._cached_pic_vec = tuple(s.vec for s in samples) 

167 else: 

168 samples = tuple( 

169 load_data(s, alwaysCopy=always_copy)[0] for s in samples) 

170 

171 if len(set(s.size for s in samples)) != 1: 

172 raise ValueError("Cannot load samples of differing shapes.") 

173 

174 self._original_shape = samples[0].size 

175 self._cached_cvx_vec = tuple( 

176 s if s.size[1] == 1 else s[:] for s in samples) 

177 elif isinstance(samples, Expression): 

178 samples = samples.refined 

179 

180 if not isinstance(samples, ComplexAffineExpression): 

181 raise TypeError("Can only extract samples from a (constant) " 

182 "affine expression, not from an instance of {}." 

183 .format(type(samples).__name__)) 

184 

185 if not samples.constant: 

186 raise TypeError("Can only extract samples from a constant " 

187 "expression, {} is not constant.".format(samples.string)) 

188 

189 self._cached_pic_mat = samples 

190 

191 # Treat any vector as a number of scalar samples. 

192 if self._cached_pic_mat.shape[1] == 1: 

193 self._cached_pic_mat = self._cached_pic_mat.T 

194 else: 

195 self._cached_cvx_mat = load_data(samples, alwaysCopy=always_copy)[0] 

196 

197 # Treat any vector as a number of scalar samples. 

198 if self._cached_cvx_mat.size[1] == 1: 

199 self._cached_cvx_mat = self._cached_cvx_mat.T 

200 

201 assert any(samples is not None for samples in ( 

202 self._cached_cvx_vec, 

203 self._cached_pic_vec, 

204 self._cached_cvx_mat, 

205 self._cached_pic_mat)) 

206 

207 if forced_original_shape is not None: 

208 forced_shape = load_shape(forced_original_shape) 

209 if forced_shape[0]*forced_shape[1] != self.dim: 

210 raise ValueError("Incompatible forced original shape.") 

211 self._original_shape = forced_shape 

212 

213 def __len__(self): 

214 """Number of samples.""" 

215 return self.num 

216 

217 def __str__(self): 

218 return glyphs.parenth( 

219 "{} {}-dimensional samples".format(self.num, self.dim)) 

220 

221 def __repr__(self): 

222 return glyphs.repr2("Samples", self.__str__()) 

223 

224 def __getitem__(self, i): 

225 return self.vectors[i] 

226 

227 def __iter__(self): 

228 for vector in self.vectors: 

229 yield vector 

230 

231 @property 

232 def _cvxopt_matrix(self): 

233 """A CVXOPT dense or sparse matrix whose columns are the samples. 

234 

235 This cached property is used by PICOS internally as accessing the CVXOPT 

236 value of a constant PICOS expression would create a copy of the data. 

237 

238 .. warning:: 

239 

240 :class:`Sample` instances are supposed to be immutable, so you are 

241 expected not to modify the returned CVXOPT objects. 

242 """ 

243 if self._cached_cvx_mat is not None: 

244 pass 

245 elif self._cached_pic_mat is not None: 

246 self._cached_cvx_mat = self._cached_pic_mat.value_as_matrix 

247 elif self._cached_cvx_vec is not None: 

248 self._cached_cvx_mat = cvxopt_hcat(self._cached_cvx_vec) 

249 else: 

250 self._cached_cvx_mat = cvxopt_hcat( 

251 [s.value_as_matrix for s in self._cached_pic_vec]) 

252 

253 return self._cached_cvx_mat 

254 

255 @property 

256 def _cvxopt_vectors(self): 

257 """A :class:`tuple` containing the samples as CVXOPT column vectors. 

258 

259 This cached property is used by PICOS internally as accessing the CVXOPT 

260 value of a constant PICOS expression would create a copy of the data. 

261 

262 .. warning:: 

263 

264 :class:`Sample` instances are supposed to be immutable, so you are 

265 expected not to modify the returned CVXOPT objects. 

266 """ 

267 if self._cached_cvx_vec is not None: 

268 pass 

269 elif self._cached_cvx_mat is not None: 

270 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i] 

271 for i in range(self._cached_cvx_mat.size[1])) 

272 elif self._cached_pic_vec is not None: 

273 self._cached_cvx_vec = tuple( 

274 s.value_as_matrix for s in self._cached_pic_vec) 

275 else: 

276 # We need to convert from a PICOS to a CVXOPT matrix, do so in a way 

277 # that caches the result. 

278 _ = self._cvxopt_matrix 

279 assert self._cached_cvx_mat is not None 

280 

281 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i] 

282 for i in range(self._cached_cvx_mat.size[1])) 

283 

284 return self._cached_cvx_vec 

285 

286 @property 

287 def matrix(self): 

288 """A matrix whose columns are the samples.""" 

289 if self._cached_pic_mat is not None: 

290 pass 

291 else: 

292 self._cached_pic_mat = Constant(self._cvxopt_matrix) 

293 

294 return self._cached_pic_mat 

295 

296 @property 

297 def vectors(self): 

298 """A :class:`tuple` containing the samples as column vectors.""" 

299 if self._cached_pic_vec is not None: 

300 pass 

301 else: 

302 self._cached_pic_vec = tuple( 

303 Constant(s) for s in self._cvxopt_vectors) 

304 

305 return self._cached_pic_vec 

306 

307 @cached_property 

308 def original(self): 

309 """A :class:`tuple` containing the samples in their original shape.""" 

310 shape = self.original_shape 

311 

312 if shape[1] == 1: 

313 return self.vectors 

314 else: 

315 return tuple(sample.reshaped(shape) for sample in self) 

316 

317 @property 

318 def dim(self): 

319 """Sample dimension.""" 

320 if self._cached_cvx_mat is not None: 

321 return self._cached_cvx_mat.size[0] 

322 elif self._cached_pic_mat is not None: 

323 return self._cached_pic_mat.shape[0] 

324 elif self._cached_cvx_vec is not None: 

325 # NOTE: len() counts nonzero entries for sparse matrices. 

326 return self._cached_cvx_vec[0].size[0] 

327 else: 

328 return len(self._cached_pic_vec[0]) 

329 

330 @property 

331 def num(self): 

332 """Number of samples.""" 

333 if self._cached_cvx_mat is not None: 

334 return self._cached_cvx_mat.size[1] 

335 elif self._cached_pic_mat is not None: 

336 return self._cached_pic_mat.shape[1] 

337 elif self._cached_cvx_vec is not None: 

338 return len(self._cached_cvx_vec) 

339 else: 

340 return len(self._cached_pic_vec) 

341 

342 @property 

343 def original_shape(self): 

344 """Original shape of the samples before vectorization.""" 

345 if self._original_shape is None: 

346 self._original_shape = (self.dim, 1) 

347 

348 return self._original_shape 

349 

350 @cached_property 

351 def mean(self): 

352 """The sample mean as a column vector.""" 

353 return Constant(sum(self._cvxopt_vectors) / self.num) 

354 

355 @cached_property 

356 def covariance(self): 

357 """The sample covariance matrix.""" 

358 if self.num == 1: 

359 return cvxopt.spmatrix([], [], [], (1, 1)) 

360 

361 mu = self.mean.value_as_matrix 

362 X = self._cvxopt_matrix 

363 Y = mu*cvxopt.matrix(1, (1, self.num)) 

364 Z = X - Y 

365 

366 return Constant(Z * Z.T / (self.num - 1)) 

367 

368 def partition(self, after_or_fraction=0.5): 

369 """Split the samples into two parts. 

370 

371 :param after_or_fraction: 

372 Either a fraction strictly between zero and one that denotes the 

373 relative size of the first partition or an integer that denotes the 

374 number of samples to put in the first partition. 

375 :type after_or_fraction: 

376 int or float 

377 """ 

378 if isinstance(after_or_fraction, float): 

379 if after_or_fraction <= 0 or after_or_fraction >= 1: 

380 raise ValueError( 

381 "A partitioning fraction must be strictly between 0 and 1.") 

382 

383 n = round(self.num * after_or_fraction) 

384 n = min(n, self.num - 1) 

385 n = max(1, n) 

386 else: 

387 n = int(after_or_fraction) 

388 

389 if n < 1 or n >= self.num: 

390 raise ValueError("Partitioning would leave one partition empty.") 

391 

392 A = self.__class__.__new__(self.__class__) 

393 B = self.__class__.__new__(self.__class__) 

394 

395 if self._cached_cvx_mat is not None: 

396 A._cached_cvx_mat = self._cached_cvx_mat[:, :n] 

397 B._cached_cvx_mat = self._cached_cvx_mat[:, n:] 

398 

399 if self._cached_cvx_vec is not None: 

400 A._cached_cvx_vec = self._cached_cvx_vec[:n] 

401 B._cached_cvx_vec = self._cached_cvx_vec[n:] 

402 

403 if self._cached_pic_mat is not None: 

404 A._cached_pic_mat = self._cached_pic_mat[:, :n] 

405 B._cached_pic_mat = self._cached_pic_mat[:, n:] 

406 

407 if self._cached_pic_vec is not None: 

408 A._cached_pic_vec = self._cached_pic_vec[:n] 

409 B._cached_pic_vec = self._cached_pic_vec[n:] 

410 

411 A._original_shape = self._original_shape 

412 B._original_shape = self._original_shape 

413 

414 return A, B 

415 

416 def select(self, indices): 

417 """Return a new :class:`Samples` instance with only selected samples. 

418 

419 :param indices: 

420 The indices of the samples to select. 

421 """ 

422 indices = list(indices) 

423 

424 S = self.__class__.__new__(self.__class__) 

425 

426 if self._cached_cvx_mat is not None: 

427 S._cached_cvx_mat = self._cached_cvx_mat[:, indices] 

428 

429 if self._cached_cvx_vec is not None: 

430 S._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in indices) 

431 

432 if self._cached_pic_mat is not None: 

433 S._cached_pic_mat = self._cached_pic_mat[:, indices] 

434 

435 if self._cached_pic_vec is not None: 

436 S._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in indices) 

437 

438 if len(S) == 0: 

439 raise ValueError("Empty susbet of samples selected.") 

440 

441 S._original_shape = self._original_shape 

442 return S 

443 

444 

445# -------------------------------------- 

446__all__ = api_end(_API_START, globals())