Coverage for picos/expressions/samples.py: 76.23%
244 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-12 07:53 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-12 07:53 +0000
1# ------------------------------------------------------------------------------
2# Copyright (C) 2020 Maximilian Stahlberg
3#
4# This file is part of PICOS.
5#
6# PICOS is free software: you can redistribute it and/or modify it under the
7# terms of the GNU General Public License as published by the Free Software
8# Foundation, either version 3 of the License, or (at your option) any later
9# version.
10#
11# PICOS is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# this program. If not, see <http://www.gnu.org/licenses/>.
17# ------------------------------------------------------------------------------
19"""Implements :class:`Samples`."""
21import random
23import cvxopt
25from .. import glyphs
26from ..apidoc import api_end, api_start
27from ..caching import cached_property
28from .data import cvxopt_hcat, load_data, load_shape
29from .exp_affine import ComplexAffineExpression, Constant
30from .expression import Expression
31from ..legacy import throw_deprecation_warning
33_API_START = api_start(globals())
34# -------------------------------
37class Samples():
38 """A collection of data points.
40 :Example:
42 >>> from picos.expressions import Samples
43 >>> # Load the column-major vectorization of six matrices.
44 >>> data = [[[1*i, 3*i],
45 ... [2*i, 4*i]] for i in range(1, 7)]
46 >>> S = Samples(data)
47 >>> S
48 <Samples: (6 4-dimensional samples)>
49 >>> [S.num, S.dim, S.original_shape] # Metadata.
50 [6, 4, (2, 2)]
51 >>> S.matrix # All samples as the columns of one matrix.
52 <4×6 Real Constant: [4×6]>
53 >>> print(S.matrix)
54 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00 5.00e+00 6.00e+00]
55 [ 2.00e+00 4.00e+00 6.00e+00 8.00e+00 1.00e+01 1.20e+01]
56 [ 3.00e+00 6.00e+00 9.00e+00 1.20e+01 1.50e+01 1.80e+01]
57 [ 4.00e+00 8.00e+00 1.20e+01 1.60e+01 2.00e+01 2.40e+01]
58 >>> print(S[0].T) # The first sample (transposed for brevity).
59 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00]
60 >>> print(S.mean.T) # The sample mean (transposed for brevity).
61 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01]
62 >>> print(S.covariance) # The sample covariance matrix.
63 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01]
64 [ 7.00e+00 1.40e+01 2.10e+01 2.80e+01]
65 [ 1.05e+01 2.10e+01 3.15e+01 4.20e+01]
66 [ 1.40e+01 2.80e+01 4.20e+01 5.60e+01]
67 >>> print(S.original[0]) # The first sample in its original shape.
68 [ 1.00e+00 3.00e+00]
69 [ 2.00e+00 4.00e+00]
70 >>> U = S.select([0, 2, 4]) # Select a subset of samples by indices.
71 >>> print(U.matrix)
72 [ 1.00e+00 3.00e+00 5.00e+00]
73 [ 2.00e+00 6.00e+00 1.00e+01]
74 [ 3.00e+00 9.00e+00 1.50e+01]
75 [ 4.00e+00 1.20e+01 2.00e+01]
76 >>> T, V = S.partition() # Split into training and validation samples.
77 >>> print(T.matrix)
78 [ 1.00e+00 2.00e+00 3.00e+00]
79 [ 2.00e+00 4.00e+00 6.00e+00]
80 [ 3.00e+00 6.00e+00 9.00e+00]
81 [ 4.00e+00 8.00e+00 1.20e+01]
82 >>> print(V.matrix)
83 [ 4.00e+00 5.00e+00 6.00e+00]
84 [ 8.00e+00 1.00e+01 1.20e+01]
85 [ 1.20e+01 1.50e+01 1.80e+01]
86 [ 1.60e+01 2.00e+01 2.40e+01]
87 """
89 def __new__(cls, samples=None, forced_original_shape=None, **kwargs):
90 """Prepare a :class:`Samples` instance."""
91 if isinstance(samples, cls):
92 if forced_original_shape is not None:
93 forced_shape = load_shape(forced_original_shape)
94 if forced_shape[0]*forced_shape[1] != samples.dim:
95 raise ValueError("Incompatible forced original shape.")
97 if forced_shape == samples.original_shape:
98 # Shapes are consistent, return the existing instance.
99 return samples
100 else:
101 # Make a shallow copy and change only the original shape.
102 self = object.__new__(cls)
103 self._cached_cvx_mat = samples._cached_cvx_mat
104 self._cached_cvx_vec = samples._cached_cvx_vec
105 self._cached_pic_mat = samples._cached_pic_mat
106 self._cached_pic_vec = samples._cached_pic_vec
107 self._original_shape = forced_shape
108 return self
109 else:
110 # Return the existing instance.
111 return samples
112 else:
113 # Return a new instance.
114 self = object.__new__(cls)
115 self._cached_cvx_mat = None
116 self._cached_cvx_vec = None
117 self._cached_pic_mat = None
118 self._cached_pic_vec = None
119 self._original_shape = None
120 return self
122 def __init__(self, samples, forced_original_shape=None, always_copy=True):
123 """Load a number of data points (samples).
125 :param samples:
126 Any of the following:
128 - A tuple or list of constants, each of which denotes a sample
129 vector. Matrices are vectorized but their :attr:`original_shape`
130 is stored and may be used by PICOS internally.
131 - A constant row or column vector whose entries denote scalar
132 samples.
133 - A constant matrix whose columns denote the samples.
134 - Another :class:`Samples` instance. If possible, it is returned as
135 is (:class:`Samples` instances are immutable), otherwise a shallow
136 copy with the necessary modifications is returned instead.
138 In any case, constants may be given as constant numeric data values
139 (anything recognized by :func:`~.data.load_data`) or as constant
140 PICOS expressions.
142 :param forced_original_shape:
143 Overwrites :attr:`original_shape` with the given shape.
145 :param bool always_copy:
146 If this is :obj:`False`, then data that is provided in the form of
147 CVXOPT types is not copied but referenced if possible. This can
148 speed up instance creation but will introduce inconsistencies if the
149 original data is modified. Note that this argument has no impact if
150 the ``samples`` argument already is a :class:`Samples` instance; in
151 this case data is never copied.
152 """
153 if isinstance(samples, Samples):
154 # Handled in __new__.
155 return
156 elif isinstance(samples, (tuple, list)):
157 if not samples:
158 raise ValueError("Need at least one sample.")
160 if all(isinstance(s, (int, float, complex)) for s in samples):
161 # Efficiently handle a list of scalars.
162 self._cached_cvx_mat = load_data(samples)[0].T
163 elif all(isinstance(s, ComplexAffineExpression)
164 and s.constant for s in samples):
165 if len(set(s.shape for s in samples)) != 1:
166 raise ValueError("Cannot load samples of differing shapes.")
168 self._original_shape = samples[0].shape
169 self._cached_pic_vec = tuple(s.vec for s in samples)
170 else:
171 samples = tuple(
172 load_data(s, alwaysCopy=always_copy)[0] for s in samples)
174 if len(set(s.size for s in samples)) != 1:
175 raise ValueError("Cannot load samples of differing shapes.")
177 self._original_shape = samples[0].size
178 self._cached_cvx_vec = tuple(
179 s if s.size[1] == 1 else s[:] for s in samples)
180 elif isinstance(samples, Expression):
181 samples = samples.refined
183 if not isinstance(samples, ComplexAffineExpression):
184 raise TypeError("Can only extract samples from a (constant) "
185 "affine expression, not from an instance of {}."
186 .format(type(samples).__name__))
188 if not samples.constant:
189 raise TypeError("Can only extract samples from a constant "
190 "expression, {} is not constant.".format(samples.string))
192 self._cached_pic_mat = samples
194 # Treat any vector as a number of scalar samples.
195 if self._cached_pic_mat.shape[1] == 1:
196 self._cached_pic_mat = self._cached_pic_mat.T
197 else:
198 self._cached_cvx_mat = load_data(samples, alwaysCopy=always_copy)[0]
200 # Treat any vector as a number of scalar samples.
201 if self._cached_cvx_mat.size[1] == 1:
202 self._cached_cvx_mat = self._cached_cvx_mat.T
204 assert any(samples is not None for samples in (
205 self._cached_cvx_vec,
206 self._cached_pic_vec,
207 self._cached_cvx_mat,
208 self._cached_pic_mat))
210 if forced_original_shape is not None:
211 forced_shape = load_shape(forced_original_shape)
212 if forced_shape[0]*forced_shape[1] != self.dim:
213 raise ValueError("Incompatible forced original shape.")
214 self._original_shape = forced_shape
216 def __len__(self):
217 """Number of samples."""
218 return self.num
220 def __str__(self):
221 return glyphs.parenth(
222 "{} {}-dimensional samples".format(self.num, self.dim))
224 def __repr__(self):
225 return glyphs.repr2("Samples", self.__str__())
227 def __getitem__(self, i):
228 return self.vectors[i]
230 def __iter__(self):
231 for vector in self.vectors:
232 yield vector
234 @property
235 def _cvxopt_matrix(self):
236 """A CVXOPT dense or sparse matrix whose columns are the samples.
238 This cached property is used by PICOS internally as accessing the CVXOPT
239 value of a constant PICOS expression would create a copy of the data.
241 .. warning::
243 :class:`Sample` instances are supposed to be immutable, so you are
244 expected not to modify the returned CVXOPT objects.
245 """
246 if self._cached_cvx_mat is not None:
247 pass
248 elif self._cached_pic_mat is not None:
249 self._cached_cvx_mat = self._cached_pic_mat.value_as_matrix
250 elif self._cached_cvx_vec is not None:
251 self._cached_cvx_mat = cvxopt_hcat(self._cached_cvx_vec)
252 else:
253 self._cached_cvx_mat = cvxopt_hcat(
254 [s.value_as_matrix for s in self._cached_pic_vec])
256 return self._cached_cvx_mat
258 @property
259 def _cvxopt_vectors(self):
260 """A :class:`tuple` containing the samples as CVXOPT column vectors.
262 This cached property is used by PICOS internally as accessing the CVXOPT
263 value of a constant PICOS expression would create a copy of the data.
265 .. warning::
267 :class:`Sample` instances are supposed to be immutable, so you are
268 expected not to modify the returned CVXOPT objects.
269 """
270 if self._cached_cvx_vec is not None:
271 pass
272 elif self._cached_cvx_mat is not None:
273 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i]
274 for i in range(self._cached_cvx_mat.size[1]))
275 elif self._cached_pic_vec is not None:
276 self._cached_cvx_vec = tuple(
277 s.value_as_matrix for s in self._cached_pic_vec)
278 else:
279 # We need to convert from a PICOS to a CVXOPT matrix, do so in a way
280 # that caches the result.
281 _ = self._cvxopt_matrix
282 assert self._cached_cvx_mat is not None
284 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i]
285 for i in range(self._cached_cvx_mat.size[1]))
287 return self._cached_cvx_vec
289 @property
290 def matrix(self):
291 """A matrix whose columns are the samples."""
292 if self._cached_pic_mat is not None:
293 pass
294 else:
295 self._cached_pic_mat = Constant(self._cvxopt_matrix)
297 return self._cached_pic_mat
299 @property
300 def vectors(self):
301 """A :class:`tuple` containing the samples as column vectors."""
302 if self._cached_pic_vec is not None:
303 pass
304 else:
305 self._cached_pic_vec = tuple(
306 Constant(s) for s in self._cvxopt_vectors)
308 return self._cached_pic_vec
310 @cached_property
311 def original(self):
312 """A :class:`tuple` containing the samples in their original shape."""
313 shape = self.original_shape
315 if shape[1] == 1:
316 return self.vectors
317 else:
318 return tuple(sample.reshaped(shape) for sample in self)
320 @property
321 def dim(self):
322 """Sample dimension."""
323 if self._cached_cvx_mat is not None:
324 return self._cached_cvx_mat.size[0]
325 elif self._cached_pic_mat is not None:
326 return self._cached_pic_mat.shape[0]
327 elif self._cached_cvx_vec is not None:
328 # NOTE: len() counts nonzero entries for sparse matrices.
329 return self._cached_cvx_vec[0].size[0]
330 else:
331 return len(self._cached_pic_vec[0])
333 @property
334 def num(self):
335 """Number of samples."""
336 if self._cached_cvx_mat is not None:
337 return self._cached_cvx_mat.size[1]
338 elif self._cached_pic_mat is not None:
339 return self._cached_pic_mat.shape[1]
340 elif self._cached_cvx_vec is not None:
341 return len(self._cached_cvx_vec)
342 else:
343 return len(self._cached_pic_vec)
345 @property
346 def original_shape(self):
347 """Original shape of the samples before vectorization."""
348 if self._original_shape is None:
349 self._original_shape = (self.dim, 1)
351 return self._original_shape
353 @cached_property
354 def mean(self):
355 """The sample mean as a column vector."""
356 return Constant(sum(self._cvxopt_vectors) / self.num)
358 @cached_property
359 def covariance(self):
360 """The sample covariance matrix."""
361 if self.num == 1:
362 return cvxopt.spmatrix([], [], [], (1, 1))
364 mu = self.mean.value_as_matrix
365 X = self._cvxopt_matrix
366 Y = mu*cvxopt.matrix(1, (1, self.num))
367 Z = X - Y
369 return Constant(Z * Z.T / (self.num - 1))
371 def shuffled(self, rng=None):
372 """Return a randomly shuffled instance of the samples.
374 :param rng: DEPRECATED
376 :Example:
378 >>> from picos.expressions import Samples
379 >>> S = Samples(range(6))
380 >>> print(S.matrix)
381 [ 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00 5.00e+00]
382 >>> import random
383 >>> random.seed(42) # Fix seed for reproducibility.
384 >>> print(S.shuffled().matrix)
385 [ 3.00e+00 1.00e+00 2.00e+00 4.00e+00 0.00e+00 5.00e+00]
386 """
387 # Handle deprecated 'rng' parameter.
388 if rng is not None:
389 throw_deprecation_warning(
390 "Argument 'rng' is deprecated and ignored.")
392 order = list(range(self.num))
393 random.shuffle(order)
395 S = self.__class__.__new__(self.__class__)
397 if self._cached_cvx_mat is not None:
398 S._cached_cvx_mat = self._cached_cvx_mat[:, order]
400 if self._cached_cvx_vec is not None:
401 S._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in order)
403 if self._cached_pic_mat is not None:
404 # NOTE: Rename to a default string for consistency.
405 S._cached_pic_mat = self._cached_pic_mat[:, order].renamed(
406 glyphs.matrix(glyphs.shape(self._cached_pic_mat.shape)))
408 if self._cached_pic_vec is not None:
409 S._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in order)
411 return S
413 def partition(self, after_or_fraction=0.5):
414 """Split the samples into two parts.
416 :param after_or_fraction:
417 Either a fraction strictly between zero and one that denotes the
418 relative size of the first partition or an integer that denotes the
419 number of samples to put in the first partition.
420 :type after_or_fraction:
421 int or float
422 """
423 if isinstance(after_or_fraction, float):
424 if after_or_fraction <= 0 or after_or_fraction >= 1:
425 raise ValueError(
426 "A partitioning fraction must be strictly between 0 and 1.")
428 n = round(self.num * after_or_fraction)
429 n = min(n, self.num - 1)
430 n = max(1, n)
431 else:
432 n = int(after_or_fraction)
434 if n < 1 or n >= self.num:
435 raise ValueError("Partitioning would leave one partition empty.")
437 A = self.__class__.__new__(self.__class__)
438 B = self.__class__.__new__(self.__class__)
440 if self._cached_cvx_mat is not None:
441 A._cached_cvx_mat = self._cached_cvx_mat[:, :n]
442 B._cached_cvx_mat = self._cached_cvx_mat[:, n:]
444 if self._cached_cvx_vec is not None:
445 A._cached_cvx_vec = self._cached_cvx_vec[:n]
446 B._cached_cvx_vec = self._cached_cvx_vec[n:]
448 if self._cached_pic_mat is not None:
449 A._cached_pic_mat = self._cached_pic_mat[:, :n]
450 B._cached_pic_mat = self._cached_pic_mat[:, n:]
452 if self._cached_pic_vec is not None:
453 A._cached_pic_vec = self._cached_pic_vec[:n]
454 B._cached_pic_vec = self._cached_pic_vec[n:]
456 A._original_shape = self._original_shape
457 B._original_shape = self._original_shape
459 return A, B
461 def kfold(self, k):
462 r"""Perform :math:`k`-fold cross-validation (without shuffling).
464 If random shuffling is desired, write ``S.shuffled().kfold(k)`` where
465 ``S`` is your :class:`Samples` instance. To make the shuffling
466 reproducible, see :meth:`shuffled`.
468 :returns list(tuple):
469 A list of :math:`k` training set and validation set pairs.
471 .. warning::
473 If the number of samples :math:`n` is not a multiple of :math:`k`,
474 then the last :math:`n \bmod k` samples will appear in every
475 training but in no validation set.
477 :Example:
479 >>> from picos.expressions import Samples
480 >>> n, k = 7, 3
481 >>> S = Samples(range(n))
482 >>> for i, (T, V) in enumerate(S.kfold(k)):
483 ... print("Partition {}:\nT = {}V = {}"
484 ... .format(i + 1, T.matrix, V.matrix))
485 Partition 1:
486 T = [ 2.00e+00 3.00e+00 4.00e+00 5.00e+00 6.00e+00]
487 V = [ 0.00e+00 1.00e+00]
488 <BLANKLINE>
489 Partition 2:
490 T = [ 0.00e+00 1.00e+00 4.00e+00 5.00e+00 6.00e+00]
491 V = [ 2.00e+00 3.00e+00]
492 <BLANKLINE>
493 Partition 3:
494 T = [ 0.00e+00 1.00e+00 2.00e+00 3.00e+00 6.00e+00]
495 V = [ 4.00e+00 5.00e+00]
496 <BLANKLINE>
498 """
499 if not isinstance(k, int):
500 raise TypeError("k must be an integer.")
502 if k < 2:
503 raise ValueError("k must be at least two.")
505 if k > self.num:
506 raise ValueError("k must not exceed the number of samples.")
508 n = self.num // k
510 assert n >= 1 and n < self.num
512 fold = []
513 indices = list(range(self.num))
515 for i in range(k):
516 t = indices[:i*n] + indices[(i+1)*n:]
517 v = indices[i*n:(i+1)*n]
519 T = self.__class__.__new__(self.__class__)
520 V = self.__class__.__new__(self.__class__)
522 if self._cached_cvx_mat is not None:
523 T._cached_cvx_mat = self._cached_cvx_mat[:, t]
524 V._cached_cvx_mat = self._cached_cvx_mat[:, v]
526 if self._cached_cvx_vec is not None:
527 T._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in t)
528 V._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in v)
530 if self._cached_pic_mat is not None:
531 T._cached_pic_mat = self._cached_pic_mat[:, t]
532 V._cached_pic_mat = self._cached_pic_mat[:, v]
534 if self._cached_pic_vec is not None:
535 T._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in t)
536 V._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in v)
538 fold.append((T, V))
540 return fold
542 def select(self, indices):
543 """Return a new :class:`Samples` instance with only selected samples.
545 :param indices:
546 The indices of the samples to select.
547 """
548 indices = list(indices)
550 S = self.__class__.__new__(self.__class__)
552 if self._cached_cvx_mat is not None:
553 S._cached_cvx_mat = self._cached_cvx_mat[:, indices]
555 if self._cached_cvx_vec is not None:
556 S._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in indices)
558 if self._cached_pic_mat is not None:
559 S._cached_pic_mat = self._cached_pic_mat[:, indices]
561 if self._cached_pic_vec is not None:
562 S._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in indices)
564 if len(S) == 0:
565 raise ValueError("Empty susbet of samples selected.")
567 S._original_shape = self._original_shape
568 return S
571# --------------------------------------
572__all__ = api_end(_API_START, globals())