A chestnut
>>> import numpy as np >>> a = np.arange(15).reshape(3, 5) >>> a array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]]) >>> a.shape (3, 5) >>> a.ndim # The number of axes of an array. In the python world, the number of axes is called rank 2 >>> a.dtype.name 'int64' >>> a.itemsize # The byte size of each element in the array. 8 >>> a.size 15 >>> type(a) <type 'numpy.ndarray'>
random number
np.random.normal
The standard normal distribution is generated, and the area under the curve is equal to 0.95 in the range of -1.96 ~ + 1.96
samples = np.random.normal(size=(4,4)) array([[ 1.84543397, 0.91407872, 0.83926424, 0.05128908], [ 0.89142489, 0.38926367, 0.22890831, -1.63944174], [ 0.50097211, 1.0324319 , 1.12945725, -0.66500686], [-0.13809704, 1.72104946, 0.4812083 , -2.35993971]]) np.random.normal(0,1,[3,2]) array([[-0.89197774, -0.23395082], [ 1.31110604, 1.25929054], [ 0.22351615, -0.38488545]])
np.random.uniform
The random number in [low,high) can be generated, which can be a single value, a one-dimensional array or a multi-dimensional array
np.random.uniform(1,5,(4,3)) #Generate 4x3 array array([[ 2.33083328, 1.592934 , 2.38072 ], [ 1.07485686, 4.93224857, 1.42584919], [ 3.2667912 , 4.57868281, 1.53218578], [ 4.17965117, 3.63912616, 2.83516143]])
np.random.randint
Generate [low,high) integer random number, which can be a single random number or an array of multi-dimensional random numbers
np.random.randint(4,10,size=(2,2),dtype='int32') array([[7, 4], [6, 9]]) #Generate floating point numbers between 0 and 1 np.random.randn(2,3) array([[ 0.59892845, 0.35535291, 0.53566027], [-0.09791823, 1.50595013, -0.5044485 ]])
np.random.rand
Returns a floating-point number in [0,1]. If there is no input, a random value in [0,1) will be returned
np.random.rand(3,3) array([[ 0.47507608, 0.64225621, 0.9926529 ], [ 0.95028412, 0.18413813, 0.91879723], [ 0.89995217, 0.42356103, 0.81312942]])
Create matrix
arange
import numpy as np a = np.arange(10) # The default is from 0 to 10 (excluding 10) in steps of 1 print(a) # Return to [0 1 2 3 4 5 6 7 8 9] a1 = np.arange(5,10) # From 5 to 10 (excluding 10) in steps of 1 print(a1) # Return to [5 6 7 8 9] a2 = np.arange(5,20,2) # From 5 to 20 (excluding 20) in steps of 2 print(a2) # Back [5 7 9 11 13 15 17 19]
linspace
import numpy as np # Similar to matlab a = np.linspace(0,10,7) # Generate an equal difference sequence with the first digit of 0 and the last digit of 10, including 7 numbers # result [ 0. 1.66666667 3.33333333 5. 6.66666667 8.33333333 10.]
ones,zeros,eye,empty
ones creates a full 1 matrix, zeros creates a full 0 matrix, eye creates an identity matrix, and empty creates an empty matrix (actual value)
ones all 1 matrix
a_ones = np.ones((3,4)) # Create 3 * 4 full 1 matrix # result [[ 1. 1. 1. 1.] [ 1. 1. 1. 1.] [ 1. 1. 1. 1.]] np.ones((2,3,4), dtype=np.int16 ) # dtype can also be specified # result array([[[ 1, 1, 1, 1], [ 1, 1, 1, 1], [ 1, 1, 1, 1]], [[ 1, 1, 1, 1], [ 1, 1, 1, 1], [ 1, 1, 1, 1]]], dtype=int16)
zeros all zero matrix
a_zeros = np.zeros((3,4)) # Create a 3 * 4 full 0 matrix print(a_zeros) # result [[ 0. 0. 0. 0.] [ 0. 0. 0. 0.] [ 0. 0. 0. 0.]]
eye identity matrix
a_eye = np.eye(3) # Create 3rd order identity matrix print(a_eye) # result [[ 1. 0. 0.] [ 0. 1. 0.] [ 0. 0. 1.]]
empty matrix
a_empty = np.empty((3,4)) # Create a 3 * 4 empty matrix print(a_empty) # result [[ 1.78006111e-306 -3.13259416e-294 4.71524461e-309 1.94927842e+289] [ 2.10230387e-309 5.42870216e+294 6.73606381e-310 3.82265219e-297] [ 6.24242356e-309 1.07034394e-296 2.12687797e+183 6.88703165e-315]]
Matrix operator
+Addition of corresponding elements of matrix
-Matrix corresponding element subtraction
*Matrix corresponding element multiplication
/Matrix corresponding element division
import numpy as np a1 = np.array([[4,5,6],[1,2,3]]) a2 = np.array([[6,5,4],[3,2,1]]) print(a1+a2) # Add up # result [[10 10 10] [ 4 4 4]] print(a1/a2) # Integer division quotient # result [[0.66666667 1. 1.5 ] [0.33333333 1. 3. ]] print(a1%a2) # Division and remainder # result [[4 0 2] [1 0 0]]
Matrix multiplication
import numpy as np a1 = np.array([[1,2,3],[4,5,6]]) # a1 is a 2 * 3 matrix a2 = np.array([[1,2],[3,4],[5,6]]) # a2 is a 3 * 2 matrix print(a1.dot(a2)) # result [[22 28] [49 64]]
Transpose of matrix a.T
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.transpose()) # Or print (A.T.) # result [[1 4] [2 5] [3 6]]
Inverse of matrix
Let A be an n-order square matrix in the number field. If there is another n-order matrix B in the same number field, make: * AB*=BA=E. Then we call B the inverse matrix of A and A the invertible matrix.
The inverse of the identity matrix is itself
import numpy as np import numpy.linalg as lg a = np.array([[1,2,3],[4,5,6],[7,8,9]]) print(lg.inv(a)) # result [[ 3.15251974e+15 -6.30503948e+15 3.15251974e+15] [-6.30503948e+15 1.26100790e+16 -6.30503948e+15] [ 3.15251974e+15 -6.30503948e+15 3.15251974e+15]]
Maximum value
The functions of the maximum and minimum values of elements in the matrix are max and min respectively.
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.max()) #Get the maximum value of the whole matrix result: 6 print(a.min()) #Results: 1 # axis=0 is the maximum (small) value in the row direction, that is, the maximum (small) value of each column is obtained print(a.max(axis=0)) # The result is [4 5 6] # axis=1 maximum (small) value in the column direction, that is, the maximum (small) value of each row is obtained print(a.max(axis=1)) # The result is [3,6] # To get the location of the maximum and minimum element, you can get it through the argmax function print(a.argmax(axis=1)) # The result is [2]
average value
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.mean()) #The result is: 3.5 # Similarly, the keyword axis parameter can be used to specify in which direction the average value is obtained print(a.mean(axis=0)) # Results [2.5 3.5 4.5] print(a.mean(axis=1)) # Results [2.5.]
Variance correlation
The function of variance is var(), which is equivalent to the function mean(abs(x - x.mean())**2)
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.var()) # Result 2.91667 print(a.var(axis=0)) # Results [2.25 2.25 2.25] print(a.var(axis=1)) # Result [0.66666667 0.66666667]
The function of standard deviation is std(), which is equivalent to sqrt(mean(abs(x - x.mean())**2))
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.std()) # Result 1.70782512766 print(a.std(axis=0)) # Results [1.5 1.5 1.5] print(a.std(axis=1)) # Results [0.81649658 0.81649658]
median
Median refers to the value in the middle of the sequence in size order. If there are even numbers, it is the average of the two numbers in the middle. The function of the median is median()
import numpy as np x = np.array([[1,2,3],[4,5,6]]) print(np.median(x)) # Median all numbers # result 3.5 print(np.median(x,axis=0)) # Take the median value along the line direction # result [ 2.5 3.5 4.5] print(np.median(x,axis=1)) # Take the median along the column direction # result [ 2. 5.]
Sum
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.sum()) # Sum the entire matrix # Results 21 print(a.sum(axis=0)) # Sum row direction # Results [5, 7, 9] print(a.sum(axis=1)) # Sum column directions # Results [6 15]
Cumulative sum
The cumulative sum of a position refers to the sum of all elements before (including) the position. For example, the sequence [1,2,3,4,5] has a cumulative sum of [1,3,6,10,15]
import numpy as np a = np.array([[1,2,3],[4,5,6]]) print(a.cumsum()) # Cumulative sum of the whole matrix # Results [1 3 6 10 15 21] print(a.cumsum(axis=0)) # Cumulative sum of column directions # result [[1 2 3] [5 7 9]] print( a.cumsum(axis=1)) # Cumulative sum of row directions # result [[ 1 3 6] [ 4 9 15]]
range
import numpy as np a = np.arange(100) a.ptp() # The result is 99
Percentile
The percentile is a measure used in statistics and represents the percentage of observations less than this value in the total q.
a = np.array([[10, 7, 4], [3, 2, 1]]) np.percentile(a, 50) #The quantile of 50% is the median after ranking in a # The result is 3.5 np.percentile(a, 50, axis=0) array([[ 6.5, 4.5, 2.5]])
weighted average
import numpy as np a = np.arange(15).reshape(3,5) np.average(a, axis=0,weights=(10, 5, 1)) # The result is [[ 0 1 2 3 4] [ 5 6 7 8 9] [10 11 12 13 14]] #35/16=2.1875 array([2.1875, 3.1875, 4.1875, 5.1875, 6.1875])
Shape manipulation
T ravel(), flatten(), and squeeze() in numpy all have the function of converting multidimensional arrays into one-dimensional arrays. The difference is:
-
Travel (): if not necessary, no copy of the source data will be generated
-
flatten(): returns a copy of the source data
-
squeeze(): dimension can only be reduced for dimensions with dimension 1
np.resize() and NP The difference between reshape()
-
Resize: directly modify the data of the original array. np.resize(a,(2,3)) or a.resize((1,9))
-
Reshape: reshape the original array data into a new dimension without changing it. If the given array data does not match the shape that needs reshape, an error will be reported. np.reshape(a, (3,-1))
a = np.floor(10*np.random.random((3,4))) a array([[ 2., 8., 0., 6.], [ 4., 5., 1., 1.], [ 8., 9., 3., 6.]]) a.shape (3, 4) a.ravel() # Convert to one-dimensional array array([ 2., 8., 0., 6., 4., 5., 1., 1., 8., 9., 3., 6.]) a.reshape(6,2) # Returns the changed array shape array([[ 2., 8.], [ 0., 6.], [ 4., 5.], [ 1., 1.], [ 8., 9.], [ 3., 6.]]) a.T # Employment confirmation array array([[ 2., 4., 8.], [ 8., 5., 9.], [ 0., 1., 3.], [ 6., 1., 6.]]) a.T.shape (4, 3)
Fancy Indexing
Slice index: through [start: end: steps]
import numpy as np #Fancy Indexing x = np.arange(16) np.random.shuffle(x) print(x) #Print all elements #[14 1 7 10 11 15 9 4 2 13 12 0 3 8 5 6] print(x[2]) print(x[1:3]) #Slice, 1 to 2 print(x[3:9:2]) #Specify Spacing slice #result 7 [1 7] [10 15 4]
Boolean index: index through an array of Boolean types
The target array is indexed by a Boolean array to find the data in the target array corresponding to the value of True in the Boolean array. It should be noted that the length of the Boolean array must be consistent with the length of the axis corresponding to the target array.
import numpy as np names = np.array(['Bob', 'Joe', 'Will']) scores = np.random.randint(0, 100, (3, 4)) # 4 grades of 3 students #result array([[58, 52, 97, 76], [91, 86, 80, 78], [26, 35, 75, 78]]) print(names == 'Bob') [ True False False] print(scores[names == 'Bob']) # Get Bob's four subjects [[58 52 97 76]]
Fancy index: index by integer array
Fancy index takes the value of the index integer array as the subscript of an axis of the target array.
import numpy as np arr3d = np.arange(12).reshape(2, 2, 3) #Two 2-row three column arrays #result array([ [[ 0, 1, 2], [ 3, 4, 5]], [[ 6, 7, 8], [ 9, 10, 11]] ]) # Use two integer arrays to perform fancy index on axis= 0,1 print(arr3d[[0, 1], [1, 1]]) #result [[ 3 4 5] [ 9 10 11]] print(arr3d[[0, 1], [0, 1, 2]]) # error x = np.arange(16) X = x.reshape(4,-1) print(X) #result [[14 1 7 10] [11 15 9 4] [ 2 13 12 0] [ 3 8 5 6]] ind1 = np.array([1,3]) #Index of rows ind2 = np.array([2,0]) #Indexed column print(X[ind1,ind2]) #Results (1,2) and (3,0) [9 3]