import numpy as np
# declare a vector using a list as the argument
= np.array([1, 2.0, 3, 4])
v v
array([1., 2., 3., 4.])
numpy
ndarray
to linalg
January 23, 2024
NumPy
is the fundamental package for scientific computing with Python. It contains among other things:
Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.
Library documentation: http://www.numpy.org/
numpy.array
objectarray([1., 2., 3., 4.])
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])
array([ 0, -3, -12, -27, -48, -75, -108, -147,
-192, -243, -300, -363, -432, -507, -588, -675,
-768, -867, -972, -1083, -1200, -1323, -1452, -1587,
-1728, -1875, -2028, -2187, -2352, -2523, -2700, -2883,
-3072, -3267, -3468, -3675, -3888, -4107, -4332, -4563,
-4800, -5043, -5292, -5547, -5808, -6075, -6348, -6627,
-6912, -7203, -7500, -7803, -8112, -8427, -8748, -9075,
-9408, -9747, -10092, -10443, -10800, -11163, -11532, -11907,
-12288, -12675, -13068, -13467, -13872, -14283, -14700, -15123,
-15552, -15987, -16428, -16875, -17328, -17787, -18252, -18723,
-19200, -19683, -20172, -20667, -21168, -21675, -22188, -22707,
-23232, -23763, -24300, -24843, -25392, -25947, -26508, -27075,
-27648, -28227, -28812, -29403])
{0: 0,
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
12: 12,
13: 13,
14: 14,
15: 15,
16: 16,
17: 17,
18: 18,
19: 19,
20: 20,
21: 21,
22: 22,
23: 23,
24: 24,
25: 25,
26: 26,
27: 27,
28: 28,
29: 29,
30: 30,
31: 31,
32: 32,
33: 33,
34: 34,
35: 35,
36: 36,
37: 37,
38: 38,
39: 39,
40: 40,
41: 41,
42: 1025,
43: 43,
44: 44,
45: 45,
46: 46,
47: 47,
48: 48,
49: 49,
50: 50,
51: 51,
52: 52,
53: 53,
54: 54,
55: 55,
56: 56,
57: 57,
58: 58,
59: 59,
60: 60,
61: 61,
62: 62,
63: 63,
64: 64,
65: 65,
66: 66,
67: 67,
68: 68,
69: 69,
70: 70,
71: 71,
72: 72,
73: 73,
74: 74,
75: 75,
76: 76,
77: 77,
78: 78,
79: 79,
80: 80,
81: 81,
82: 82,
83: 83,
84: 84,
85: 85,
86: 86,
87: 87,
88: 88,
89: 89,
90: 90,
91: 91,
92: 92,
93: 93,
94: 94,
95: 95,
96: 96,
97: 97,
98: 98,
99: 99}
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 1026, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
100])
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 1026, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
100])
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 1026, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
100])
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 1026, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
100])
Warning
Beware of the dimensions: a 1D array is not the same as a 2D array with 1 column
( np.array([a2])
.transpose() # column vector
.dot(
np.array([a1])
)
) # column vector multiplied by row vector
array([[1, 2, 3],
[2, 4, 6],
[3, 6, 9]])
# Declare a 2D array using a nested list as the constructor argument
M = np.array([[1,2],
[3,4],
[3.14, -9.17]])
M
array([[ 1. , 2. ],
[ 3. , 4. ],
[ 3.14, -9.17]])
array([[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11]])
array([ 0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8, 2. ,
2.2, 2.4, 2.6, 2.8, 3. , 3.2, 3.4, 3.6, 3.8, 4. , 4.2,
4.4, 4.6, 4.8, 5. , 5.2, 5.4, 5.6, 5.8, 6. , 6.2, 6.4,
6.6, 6.8, 7. , 7.2, 7.4, 7.6, 7.8, 8. , 8.2, 8.4, 8.6,
8.8, 9. , 9.2, 9.4, 9.6, 9.8, 10. ])
(array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
2.98095799e+03, 8.10308393e+03, 2.20264658e+04]),
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
2.98095799e+03, 8.10308393e+03, 2.20264658e+04]))
import matplotlib.pyplot as plt
# Random standard Gaussian numbers
fig = plt.figure(figsize=(8, 4))
wn = np.random.randn(1000)
bm = wn.cumsum()
plt.plot(bm, lw=3)
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 2, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 3, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 4, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 5, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 6, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 7, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 8, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 9]])
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
# slicing works just like with anything else (lists, etc.)
A = np.array([1, 2, 3, 4, 5])
print(A)
print(A[::-1])
print(A[::2])
print(A[:-1:2])
[1 2 3 4 5]
[5 4 3 2 1]
[1 3 5]
[1 3]
[[0, 1, 2, 3, 4],
[10, 11, 12, 13, 14],
[20, 21, 22, 23, 24],
[30, 31, 32, 33, 34],
[40, 41, 42, 43, 44]]
array([[ 0, 1, 2, 3, 4],
[10, 11, 12, 13, 14],
[20, 21, 22, 23, 24],
[30, 31, 32, 33, 34],
[40, 41, 42, 43, 44]])
array([[ 0, 1, 2, 3, 4],
[ 10, 11, 123, 13, 14],
[ 20, 21, 22, 23, 24],
[ 30, 31, 32, 33, 34],
[ 40, 41, 42, 43, 44]])
array([[ 4, 3, 2, 1, 0],
[ 14, 13, 123, 11, 10],
[ 24, 23, 22, 21, 20],
[ 34, 33, 32, 31, 30],
[ 44, 43, 42, 41, 40]])
[[ 10 11 123 13 14]
[ 20 21 22 23 24]
[ 40 41 42 43 44]]
Another way is through masking with an array of bool
s
Don’t forget that python
does not make copies unless told to do so (same as with any mutable type)
If you are not careful enough, this typically leads to a lot of errors and to being fired !!
To put values of x in y (copy values into an existing array) use
(array([ 0.22882628, 1.01836679, 1.02519228, -0.21674823, 0.77187089,
-0.07460457, 1.17871761, 0.00135803, 1.06703629, 0.57036614]),
139656462113488)
(array([2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78, 2.78]),
139656462113488)
(array([3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14, 3.14]),
139656462113488)
(array([-0.33509421, 0.6674441 , 0.80431907, 1.23374741, 1.29200237,
-0.72160291, 0.04892273, 0.70665894, -2.40541469, -0.92614552]),
139656462113488)
(array([0.33509421, 0.6674441 , 0.80431907, 1.23374741, 1.29200237,
0.72160291, 0.04892273, 0.70665894, 2.40541469, 0.92614552]),
139656460714608)
(array([-0.33509421, 0.6674441 , 0.80431907, 1.23374741, 1.29200237,
-0.72160291, 0.04892273, 0.70665894, -2.40541469, -0.92614552]),
139656462113488,
139656462113488,
True)
Final warning
In the next line you copy the values of x
into an existing array y
(of same size…)
(array([-0.33509421, 0.6674441 , 0.80431907, 1.23374741, 1.29200237,
-0.72160291, 0.04892273, 0.70665894, -2.40541469, -0.92614552]),
False,
True)
While in the next line, you are aliasing, you are giving a new name y
to the object named x
(you should never, ever write something like this)
A numpy
array can contain other things than numeric types
(array(['ou', 'là', 'là', ",ç'est", 'dur', 'python'], dtype='<U6'),
(6,),
dtype('<U6'))
numpy
So far, we have only used array
or ndarray
objects
The is another type: the matrix
type
In words: don’t use it (IMhO) and stick with arrays
(matrix([[0, 1, 2]]), matrix([[0, 1, 2]]))
(matrix([[0, 0, 0],
[0, 1, 2],
[0, 2, 4]]),
(1, 3),
matrix([[0, 0, 0],
[0, 1, 2],
[0, 2, 4]]))
array([[1, 0, 0, 1],
[1, 0, 0, 1],
[0, 0, 0, 2],
[1, 1, 0, 0]])
(0, 0) 1
(0, 3) 1
(1, 0) 1
(1, 3) 1
(2, 3) 2
(3, 0) 1
(3, 1) 1
<4x4 sparse matrix of type '<class 'numpy.int64'>'
with 7 stored elements in COOrdinate format>
print(X, end='\n----\n')
print(X_coo.data, end='\n----\n')
print(X_coo.row, end='\n----\n')
print(X_coo.col, end='\n----\n')
[[1 0 0 1]
[1 0 0 1]
[0 0 0 2]
[1 1 0 0]]
----
[1 1 1 1 2 1 1]
----
[0 0 1 1 2 3 3]
----
[0 3 0 3 3 0 1]
----
There is also
csr_matrix
: sparse rows formatcsc_matrix
: sparse columns formatSparse rows is often used for machine learning: sparse features vectors
But sparse column format useful as well (e.g. coordinate gradient descent)
array([[ 1.90961279, 0.45025904, 0.93362367, 2.35155649, 0.58194797],
[ 0.63258277, -0.088729 , 0.63493499, 0.22867402, 0.15696287],
[-1.45274264, -0.7438334 , -0.26355842, 0.89366631, -0.95255453],
[-0.18452788, -1.49901081, -1.45384679, 0.1891478 , -0.24027981],
[-1.08846102, -1.19436781, 0.72603139, 0.24786334, -0.6253346 ]])
# All number displayed by numpy (in the current kernel) are with 3 decimals max
np.set_printoptions(precision=3)
print(X)
np.set_printoptions(precision=8)
[[ 1.91 0.45 0.934 2.352 0.582]
[ 0.633 -0.089 0.635 0.229 0.157]
[-1.453 -0.744 -0.264 0.894 -0.953]
[-0.185 -1.499 -1.454 0.189 -0.24 ]
[-1.088 -1.194 0.726 0.248 -0.625]]
numpy
arrays can have any number of dimension (hence the name ndarray
)
array([[[ 0, 1, 2],
[ 3, 4, 5]],
[[ 6, 7, 8],
[ 9, 10, 11]],
[[12, 13, 14],
[15, 16, 17]]])
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41]])
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 8, 10, 12, 14, 16],
[ 18, 21, 24, 27, 30, 33],
[ 36, 40, 44, 48, 52, 56],
[ 60, 65, 70, 75, 80, 85],
[ 90, 96, 102, 108, 114, 120],
[126, 133, 140, 147, 154, 161]])
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35],
[36, 37, 38, 39, 40, 41]])
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]])
array([[ 0, 0, 0, 0, 0],
[ 5, 6, 7, 8, 9],
[10, 12, 14, 16, 18],
[15, 18, 21, 24, 27],
[20, 24, 28, 32, 36]])
# Inner product between vectors
print(v1.dot(v2))
# You can use also (but first solution is better)
print(np.dot(v1, v2))
80
80
(array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24],
[25, 26, 27, 28, 29]]),
array([0, 1, 2, 3, 4]))
array([[ 0, 5, 10, 15, 20, 25],
[ 1, 6, 11, 16, 21, 26],
[ 2, 7, 12, 17, 22, 27],
[ 3, 8, 13, 18, 23, 28],
[ 4, 9, 14, 19, 24, 29]])
[[42 2 3]
[ 4 5 6]
[ 7 8 9]]
[1 2 3]
array([2.18366847e-18, 2.31698718e-16, 3.33333333e-01])
Solving a system of linear equations can be done in many different ways, depending on the rank of matrix A
.
A
is square and invertible, … choose \(A^{-1} \times b\)A
is not invertible
A
has full column rank, return the unique minimizer of \(\| A z - b \|^2\)A
does not have full column rank, return a minimizer of \(\| A z - b \|^2\) with minimal \(\ell_2\) norm.array([ 1.3153937 , 0.55932771, -0.33618987])
array([[-0.58326142, -0.82699802, 0.18630866],
[-0.36228081, 0.32274286, -0.77541192],
[-0.72702045, 0.46033826, 0.60334521]])
Decomposes any real matrix \(A \in \mathbb R^{m \times n}\) as follows: \[ A = U \times S \times V^\top \] where - \(U\) and \(V\) are orthogonal matrices (meaning that \(U^\top \times U = I\) and \(V^\top \times V = I\)) - \(S\) is a diagonal matrix that contains the singular values in decreasing order
[[0.82562092 0.42146713 0.1829056 ]
[0.10875482 0.09517153 0.52079772]
[0.35551885 0.82766939 0.61773909]]
Note that the above line implements unpacking
array([[0.82562092, 0.42146713, 0.1829056 ],
[0.10875482, 0.09517153, 0.52079772],
[0.35551885, 0.82766939, 0.61773909]])
array([[-4.44089210e-16, 2.22044605e-16, -5.55111512e-17],
[-1.24900090e-16, -1.94289029e-16, 0.00000000e+00],
[-3.33066907e-16, -3.33066907e-16, -2.22044605e-16]])
# U and V are indeed orthonormal
np.set_printoptions(precision=2)
print(U.T.dot(U), V.T.dot(V), sep=2 * '\n')
np.set_printoptions(precision=8)
[[ 1.00e+00 -1.67e-16 -1.11e-16]
[-1.67e-16 1.00e+00 -3.61e-16]
[-1.11e-16 -3.61e-16 1.00e+00]]
[[1.00e+00 3.33e-16 1.11e-16]
[3.33e-16 1.00e+00 2.22e-16]
[1.11e-16 2.22e-16 1.00e+00]]
scipy.misc.face()
compute_approx(X, r)
def compute_approx(X: np.ndarray, r: int):
"""Computes the best rank-r approximation of X using SVD.
We expect X to the 3D array corresponding to a color image, that we
reduce to a 2D one to apply SVD (no broadcasting).
Parameters
----------
X : `np.ndarray`, shape=(n_rows, n_cols, 3)
The input 3D ndarray
r : `int`
The desired rank
Return
------
output : `np.ndarray`, shape=(n_rows, n_cols, 3)
The best rank-r approximation of X
"""
n_rows, n_cols, n_channels = X.shape
# Reshape X to a 2D array
X_reshape = X.reshape(n_rows, n_cols * n_channels)
# Compute SVD
U, S, V = np.linalg.svd(X_reshape, full_matrices=False)
# Keep only the top r first singular values
S[r:] = 0
# Compute the approximation
X_reshape_r = U.dot(np.diag(S)).dot(V)
# Put it between 0 and 255 again and cast to integer type
return X_reshape_r.clip(min=0, max=255).astype('int')\
.reshape(n_rows, n_cols, n_channels)