Coverage for picos/expressions/samples.py: 76.45%
242 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-26 07:46 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-26 07:46 +0000
1# ------------------------------------------------------------------------------
2# Copyright (C) 2020 Maximilian Stahlberg
3#
4# This file is part of PICOS.
5#
6# PICOS is free software: you can redistribute it and/or modify it under the
7# terms of the GNU General Public License as published by the Free Software
8# Foundation, either version 3 of the License, or (at your option) any later
9# version.
10#
11# PICOS is distributed in the hope that it will be useful, but WITHOUT ANY
12# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along with
16# this program. If not, see <http://www.gnu.org/licenses/>.
17# ------------------------------------------------------------------------------
19"""Implements :class:`Samples`."""
21import random
23import cvxopt
25from .. import glyphs
26from ..apidoc import api_end, api_start
27from ..caching import cached_property
28from .data import cvxopt_hcat, load_data, load_shape
29from .exp_affine import ComplexAffineExpression, Constant
30from .expression import Expression
32_API_START = api_start(globals())
33# -------------------------------
36class Samples():
37 """A collection of data points.
39 :Example:
41 >>> from picos.expressions import Samples
42 >>> # Load the column-major vectorization of six matrices.
43 >>> data = [[[1*i, 3*i],
44 ... [2*i, 4*i]] for i in range(1, 7)]
45 >>> S = Samples(data)
46 >>> S
47 <Samples: (6 4-dimensional samples)>
48 >>> [S.num, S.dim, S.original_shape] # Metadata.
49 [6, 4, (2, 2)]
50 >>> S.matrix # All samples as the columns of one matrix.
51 <4×6 Real Constant: [4×6]>
52 >>> print(S.matrix)
53 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00 5.00e+00 6.00e+00]
54 [ 2.00e+00 4.00e+00 6.00e+00 8.00e+00 1.00e+01 1.20e+01]
55 [ 3.00e+00 6.00e+00 9.00e+00 1.20e+01 1.50e+01 1.80e+01]
56 [ 4.00e+00 8.00e+00 1.20e+01 1.60e+01 2.00e+01 2.40e+01]
57 >>> print(S[0].T) # The first sample (transposed for brevity).
58 [ 1.00e+00 2.00e+00 3.00e+00 4.00e+00]
59 >>> print(S.mean.T) # The sample mean (transposed for brevity).
60 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01]
61 >>> print(S.covariance) # The sample covariance matrix.
62 [ 3.50e+00 7.00e+00 1.05e+01 1.40e+01]
63 [ 7.00e+00 1.40e+01 2.10e+01 2.80e+01]
64 [ 1.05e+01 2.10e+01 3.15e+01 4.20e+01]
65 [ 1.40e+01 2.80e+01 4.20e+01 5.60e+01]
66 >>> print(S.original[0]) # The first sample in its original shape.
67 [ 1.00e+00 3.00e+00]
68 [ 2.00e+00 4.00e+00]
69 >>> U = S.select([0, 2, 4]) # Select a subset of samples by indices.
70 >>> print(U.matrix)
71 [ 1.00e+00 3.00e+00 5.00e+00]
72 [ 2.00e+00 6.00e+00 1.00e+01]
73 [ 3.00e+00 9.00e+00 1.50e+01]
74 [ 4.00e+00 1.20e+01 2.00e+01]
75 >>> T, V = S.partition() # Split into training and validation samples.
76 >>> print(T.matrix)
77 [ 1.00e+00 2.00e+00 3.00e+00]
78 [ 2.00e+00 4.00e+00 6.00e+00]
79 [ 3.00e+00 6.00e+00 9.00e+00]
80 [ 4.00e+00 8.00e+00 1.20e+01]
81 >>> print(V.matrix)
82 [ 4.00e+00 5.00e+00 6.00e+00]
83 [ 8.00e+00 1.00e+01 1.20e+01]
84 [ 1.20e+01 1.50e+01 1.80e+01]
85 [ 1.60e+01 2.00e+01 2.40e+01]
86 """
88 def __new__(cls, samples=None, forced_original_shape=None, **kwargs):
89 """Prepare a :class:`Samples` instance."""
90 if isinstance(samples, cls):
91 if forced_original_shape is not None:
92 forced_shape = load_shape(forced_original_shape)
93 if forced_shape[0]*forced_shape[1] != samples.dim:
94 raise ValueError("Incompatible forced original shape.")
96 if forced_shape == samples.original_shape:
97 # Shapes are consistent, return the existing instance.
98 return samples
99 else:
100 # Make a shallow copy and change only the original shape.
101 self = object.__new__(cls)
102 self._cached_cvx_mat = samples._cached_cvx_mat
103 self._cached_cvx_vec = samples._cached_cvx_vec
104 self._cached_pic_mat = samples._cached_pic_mat
105 self._cached_pic_vec = samples._cached_pic_vec
106 self._original_shape = forced_shape
107 return self
108 else:
109 # Return the existing instance.
110 return samples
111 else:
112 # Return a new instance.
113 self = object.__new__(cls)
114 self._cached_cvx_mat = None
115 self._cached_cvx_vec = None
116 self._cached_pic_mat = None
117 self._cached_pic_vec = None
118 self._original_shape = None
119 return self
121 def __init__(self, samples, forced_original_shape=None, always_copy=True):
122 """Load a number of data points (samples).
124 :param samples:
125 Any of the following:
127 - A tuple or list of constants, each of which denotes a sample
128 vector. Matrices are vectorized but their :attr:`original_shape`
129 is stored and may be used by PICOS internally.
130 - A constant row or column vector whose entries denote scalar
131 samples.
132 - A constant matrix whose columns denote the samples.
133 - Another :class:`Samples` instance. If possible, it is returned as
134 is (:class:`Samples` instances are immutable), otherwise a shallow
135 copy with the necessary modifications is returned instead.
137 In any case, constants may be given as constant numeric data values
138 (anything recognized by :func:`~.data.load_data`) or as constant
139 PICOS expressions.
141 :param forced_original_shape:
142 Overwrites :attr:`original_shape` with the given shape.
144 :param bool always_copy:
145 If this is :obj:`False`, then data that is provided in the form of
146 CVXOPT types is not copied but referenced if possible. This can
147 speed up instance creation but will introduce inconsistencies if the
148 original data is modified. Note that this argument has no impact if
149 the ``samples`` argument already is a :class:`Samples` instance; in
150 this case data is never copied.
151 """
152 if isinstance(samples, Samples):
153 # Handled in __new__.
154 return
155 elif isinstance(samples, (tuple, list)):
156 if not samples:
157 raise ValueError("Need at least one sample.")
159 if all(isinstance(s, (int, float, complex)) for s in samples):
160 # Efficiently handle a list of scalars.
161 self._cached_cvx_mat = load_data(samples)[0].T
162 elif all(isinstance(s, ComplexAffineExpression)
163 and s.constant for s in samples):
164 if len(set(s.shape for s in samples)) != 1:
165 raise ValueError("Cannot load samples of differing shapes.")
167 self._original_shape = samples[0].shape
168 self._cached_pic_vec = tuple(s.vec for s in samples)
169 else:
170 samples = tuple(
171 load_data(s, alwaysCopy=always_copy)[0] for s in samples)
173 if len(set(s.size for s in samples)) != 1:
174 raise ValueError("Cannot load samples of differing shapes.")
176 self._original_shape = samples[0].size
177 self._cached_cvx_vec = tuple(
178 s if s.size[1] == 1 else s[:] for s in samples)
179 elif isinstance(samples, Expression):
180 samples = samples.refined
182 if not isinstance(samples, ComplexAffineExpression):
183 raise TypeError("Can only extract samples from a (constant) "
184 "affine expression, not from an instance of {}."
185 .format(type(samples).__name__))
187 if not samples.constant:
188 raise TypeError("Can only extract samples from a constant "
189 "expression, {} is not constant.".format(samples.string))
191 self._cached_pic_mat = samples
193 # Treat any vector as a number of scalar samples.
194 if self._cached_pic_mat.shape[1] == 1:
195 self._cached_pic_mat = self._cached_pic_mat.T
196 else:
197 self._cached_cvx_mat = load_data(samples, alwaysCopy=always_copy)[0]
199 # Treat any vector as a number of scalar samples.
200 if self._cached_cvx_mat.size[1] == 1:
201 self._cached_cvx_mat = self._cached_cvx_mat.T
203 assert any(samples is not None for samples in (
204 self._cached_cvx_vec,
205 self._cached_pic_vec,
206 self._cached_cvx_mat,
207 self._cached_pic_mat))
209 if forced_original_shape is not None:
210 forced_shape = load_shape(forced_original_shape)
211 if forced_shape[0]*forced_shape[1] != self.dim:
212 raise ValueError("Incompatible forced original shape.")
213 self._original_shape = forced_shape
215 def __len__(self):
216 """Number of samples."""
217 return self.num
219 def __str__(self):
220 return glyphs.parenth(
221 "{} {}-dimensional samples".format(self.num, self.dim))
223 def __repr__(self):
224 return glyphs.repr2("Samples", self.__str__())
226 def __getitem__(self, i):
227 return self.vectors[i]
229 def __iter__(self):
230 for vector in self.vectors:
231 yield vector
233 @property
234 def _cvxopt_matrix(self):
235 """A CVXOPT dense or sparse matrix whose columns are the samples.
237 This cached property is used by PICOS internally as accessing the CVXOPT
238 value of a constant PICOS expression would create a copy of the data.
240 .. warning::
242 :class:`Sample` instances are supposed to be immutable, so you are
243 expected not to modify the returned CVXOPT objects.
244 """
245 if self._cached_cvx_mat is not None:
246 pass
247 elif self._cached_pic_mat is not None:
248 self._cached_cvx_mat = self._cached_pic_mat.value_as_matrix
249 elif self._cached_cvx_vec is not None:
250 self._cached_cvx_mat = cvxopt_hcat(self._cached_cvx_vec)
251 else:
252 self._cached_cvx_mat = cvxopt_hcat(
253 [s.value_as_matrix for s in self._cached_pic_vec])
255 return self._cached_cvx_mat
257 @property
258 def _cvxopt_vectors(self):
259 """A :class:`tuple` containing the samples as CVXOPT column vectors.
261 This cached property is used by PICOS internally as accessing the CVXOPT
262 value of a constant PICOS expression would create a copy of the data.
264 .. warning::
266 :class:`Sample` instances are supposed to be immutable, so you are
267 expected not to modify the returned CVXOPT objects.
268 """
269 if self._cached_cvx_vec is not None:
270 pass
271 elif self._cached_cvx_mat is not None:
272 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i]
273 for i in range(self._cached_cvx_mat.size[1]))
274 elif self._cached_pic_vec is not None:
275 self._cached_cvx_vec = tuple(
276 s.value_as_matrix for s in self._cached_pic_vec)
277 else:
278 # We need to convert from a PICOS to a CVXOPT matrix, do so in a way
279 # that caches the result.
280 _ = self._cvxopt_matrix
281 assert self._cached_cvx_mat is not None
283 self._cached_cvx_vec = tuple(self._cached_cvx_mat[:, i]
284 for i in range(self._cached_cvx_mat.size[1]))
286 return self._cached_cvx_vec
288 @property
289 def matrix(self):
290 """A matrix whose columns are the samples."""
291 if self._cached_pic_mat is not None:
292 pass
293 else:
294 self._cached_pic_mat = Constant(self._cvxopt_matrix)
296 return self._cached_pic_mat
298 @property
299 def vectors(self):
300 """A :class:`tuple` containing the samples as column vectors."""
301 if self._cached_pic_vec is not None:
302 pass
303 else:
304 self._cached_pic_vec = tuple(
305 Constant(s) for s in self._cvxopt_vectors)
307 return self._cached_pic_vec
309 @cached_property
310 def original(self):
311 """A :class:`tuple` containing the samples in their original shape."""
312 shape = self.original_shape
314 if shape[1] == 1:
315 return self.vectors
316 else:
317 return tuple(sample.reshaped(shape) for sample in self)
319 @property
320 def dim(self):
321 """Sample dimension."""
322 if self._cached_cvx_mat is not None:
323 return self._cached_cvx_mat.size[0]
324 elif self._cached_pic_mat is not None:
325 return self._cached_pic_mat.shape[0]
326 elif self._cached_cvx_vec is not None:
327 # NOTE: len() counts nonzero entries for sparse matrices.
328 return self._cached_cvx_vec[0].size[0]
329 else:
330 return len(self._cached_pic_vec[0])
332 @property
333 def num(self):
334 """Number of samples."""
335 if self._cached_cvx_mat is not None:
336 return self._cached_cvx_mat.size[1]
337 elif self._cached_pic_mat is not None:
338 return self._cached_pic_mat.shape[1]
339 elif self._cached_cvx_vec is not None:
340 return len(self._cached_cvx_vec)
341 else:
342 return len(self._cached_pic_vec)
344 @property
345 def original_shape(self):
346 """Original shape of the samples before vectorization."""
347 if self._original_shape is None:
348 self._original_shape = (self.dim, 1)
350 return self._original_shape
352 @cached_property
353 def mean(self):
354 """The sample mean as a column vector."""
355 return Constant(sum(self._cvxopt_vectors) / self.num)
357 @cached_property
358 def covariance(self):
359 """The sample covariance matrix."""
360 if self.num == 1:
361 return cvxopt.spmatrix([], [], [], (1, 1))
363 mu = self.mean.value_as_matrix
364 X = self._cvxopt_matrix
365 Y = mu*cvxopt.matrix(1, (1, self.num))
366 Z = X - Y
368 return Constant(Z * Z.T / (self.num - 1))
370 def shuffled(self, rng=None):
371 """Return a randomly shuffled instance of the samples.
373 :param rng:
374 A function that generates a random :class:`float` in :math:`[0, 1)`.
375 Defaults to whatever :func:`random.shuffle` defaults to.
377 :Example:
379 >>> from picos.expressions import Samples
380 >>> S = Samples(range(6))
381 >>> print(S.matrix)
382 [ 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00 5.00e+00]
383 >>> rng = lambda: 0.5 # Fake RNG for reproducibility.
384 >>> print(S.shuffled(rng).matrix)
385 [ 0.00e+00 5.00e+00 1.00e+00 4.00e+00 2.00e+00 3.00e+00]
386 """
387 order = list(range(self.num))
388 random.shuffle(order, rng)
390 S = self.__class__.__new__(self.__class__)
392 if self._cached_cvx_mat is not None:
393 S._cached_cvx_mat = self._cached_cvx_mat[:, order]
395 if self._cached_cvx_vec is not None:
396 S._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in order)
398 if self._cached_pic_mat is not None:
399 # NOTE: Rename to a default string for consistency.
400 S._cached_pic_mat = self._cached_pic_mat[:, order].renamed(
401 glyphs.matrix(glyphs.shape(self._cached_pic_mat.shape)))
403 if self._cached_pic_vec is not None:
404 S._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in order)
406 return S
408 def partition(self, after_or_fraction=0.5):
409 """Split the samples into two parts.
411 :param after_or_fraction:
412 Either a fraction strictly between zero and one that denotes the
413 relative size of the first partition or an integer that denotes the
414 number of samples to put in the first partition.
415 :type after_or_fraction:
416 int or float
417 """
418 if isinstance(after_or_fraction, float):
419 if after_or_fraction <= 0 or after_or_fraction >= 1:
420 raise ValueError(
421 "A partitioning fraction must be strictly between 0 and 1.")
423 n = round(self.num * after_or_fraction)
424 n = min(n, self.num - 1)
425 n = max(1, n)
426 else:
427 n = int(after_or_fraction)
429 if n < 1 or n >= self.num:
430 raise ValueError("Partitioning would leave one partition empty.")
432 A = self.__class__.__new__(self.__class__)
433 B = self.__class__.__new__(self.__class__)
435 if self._cached_cvx_mat is not None:
436 A._cached_cvx_mat = self._cached_cvx_mat[:, :n]
437 B._cached_cvx_mat = self._cached_cvx_mat[:, n:]
439 if self._cached_cvx_vec is not None:
440 A._cached_cvx_vec = self._cached_cvx_vec[:n]
441 B._cached_cvx_vec = self._cached_cvx_vec[n:]
443 if self._cached_pic_mat is not None:
444 A._cached_pic_mat = self._cached_pic_mat[:, :n]
445 B._cached_pic_mat = self._cached_pic_mat[:, n:]
447 if self._cached_pic_vec is not None:
448 A._cached_pic_vec = self._cached_pic_vec[:n]
449 B._cached_pic_vec = self._cached_pic_vec[n:]
451 A._original_shape = self._original_shape
452 B._original_shape = self._original_shape
454 return A, B
456 def kfold(self, k):
457 r"""Perform :math:`k`-fold cross-validation (without shuffling).
459 If random shuffling is desired, write ``S.shuffled().kfold(k)`` where
460 ``S`` is your :class:`Samples` instance. To make the shuffling
461 reproducible, see :meth:`shuffled`.
463 :returns list(tuple):
464 A list of :math:`k` training set and validation set pairs.
466 .. warning::
468 If the number of samples :math:`n` is not a multiple of :math:`k`,
469 then the last :math:`n \bmod k` samples will appear in every
470 training but in no validation set.
472 :Example:
474 >>> from picos.expressions import Samples
475 >>> n, k = 7, 3
476 >>> S = Samples(range(n))
477 >>> for i, (T, V) in enumerate(S.kfold(k)):
478 ... print("Partition {}:\nT = {}V = {}"
479 ... .format(i + 1, T.matrix, V.matrix))
480 Partition 1:
481 T = [ 2.00e+00 3.00e+00 4.00e+00 5.00e+00 6.00e+00]
482 V = [ 0.00e+00 1.00e+00]
483 <BLANKLINE>
484 Partition 2:
485 T = [ 0.00e+00 1.00e+00 4.00e+00 5.00e+00 6.00e+00]
486 V = [ 2.00e+00 3.00e+00]
487 <BLANKLINE>
488 Partition 3:
489 T = [ 0.00e+00 1.00e+00 2.00e+00 3.00e+00 6.00e+00]
490 V = [ 4.00e+00 5.00e+00]
491 <BLANKLINE>
493 """
494 if not isinstance(k, int):
495 raise TypeError("k must be an integer.")
497 if k < 2:
498 raise ValueError("k must be at least two.")
500 if k > self.num:
501 raise ValueError("k must not exceed the number of samples.")
503 n = self.num // k
505 assert n >= 1 and n < self.num
507 fold = []
508 indices = list(range(self.num))
510 for i in range(k):
511 t = indices[:i*n] + indices[(i+1)*n:]
512 v = indices[i*n:(i+1)*n]
514 T = self.__class__.__new__(self.__class__)
515 V = self.__class__.__new__(self.__class__)
517 if self._cached_cvx_mat is not None:
518 T._cached_cvx_mat = self._cached_cvx_mat[:, t]
519 V._cached_cvx_mat = self._cached_cvx_mat[:, v]
521 if self._cached_cvx_vec is not None:
522 T._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in t)
523 V._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in v)
525 if self._cached_pic_mat is not None:
526 T._cached_pic_mat = self._cached_pic_mat[:, t]
527 V._cached_pic_mat = self._cached_pic_mat[:, v]
529 if self._cached_pic_vec is not None:
530 T._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in t)
531 V._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in v)
533 fold.append((T, V))
535 return fold
537 def select(self, indices):
538 """Return a new :class:`Samples` instance with only selected samples.
540 :param indices:
541 The indices of the samples to select.
542 """
543 indices = list(indices)
545 S = self.__class__.__new__(self.__class__)
547 if self._cached_cvx_mat is not None:
548 S._cached_cvx_mat = self._cached_cvx_mat[:, indices]
550 if self._cached_cvx_vec is not None:
551 S._cached_cvx_vec = tuple(self._cached_cvx_vec[i] for i in indices)
553 if self._cached_pic_mat is not None:
554 S._cached_pic_mat = self._cached_pic_mat[:, indices]
556 if self._cached_pic_vec is not None:
557 S._cached_pic_vec = tuple(self._cached_pic_vec[i] for i in indices)
559 if len(S) == 0:
560 raise ValueError("Empty susbet of samples selected.")
562 S._original_shape = self._original_shape
563 return S
566# --------------------------------------
567__all__ = api_end(_API_START, globals())