Library: NumPy

Installation

In [1]:
#conda install numpy
#pip install numpy

import numpy as np

Numpy Arrays

  • A homogeneous container of numerical element of a single type.

  • Arrays can have 1 dimension (similar to python lists) or 2 dimensions (similar to python matrix).

  • Axes are defined for arrays with more than one dimension: the first running vertically downwards across rows (axis 0), and the second running horizontally across columns (axis 1).

Creating NumPy Arrays

In [2]:
#From a Python list

my_list = [1,2,3]
my_list
Out[2]:
[1, 2, 3]
In [3]:
arr = np.array(my_list)
arr
Out[3]:
array([1, 2, 3])
In [4]:
#From a Python matrix

my_matrix = [[1,2,3],[4,5,6],[7,8,9]]
my_matrix
Out[4]:
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
In [5]:
mat = np.array(my_matrix)
mat
Out[5]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

Array

np.array(object [, dtype=None, copy=True, order='K', subok=False, ndmin=0])

In [6]:
#create an array 1 dimension
arr = np.array([2,4,6]) 
arr
Out[6]:
array([2, 4, 6])
In [7]:
#create an array 2 dimensions (matrix)
mat = np.array([[2,4,6], [8,10,12], [14,16,18]]) 
mat
Out[7]:
array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

zeros and ones

In [8]:
#generates arrays of zeros
arr0 = np.zeros(3)
mat0 = np.zeros((5,5))
mat0
Out[8]:
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])
In [9]:
#generates arrays of ones
arr1 = np.ones(3)
mat1 = np.ones((3,3))
mat1
Out[9]:
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

arange array

np.arange(start, stop, [step, ]dtype=None)

In [10]:
#create sequences of numbers
arr = np.arange(0,10, 2, int ) #np.arange([start,] stop [, step, dtype])
arr
Out[10]:
array([0, 2, 4, 6, 8])
In [11]:
arr = np.arange( 0, 2, 0.3 ) #accepts float
arr
Out[11]:
array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])
In [12]:
arr = np.arange(0,12).reshape(3,4) #can be reshaped in rows and columns
arr
Out[12]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

linspace

np.linspace(start, stop, num=50[, endpoint=True, retstep=False, dtype=None, axis=0])

In [13]:
#Return evenly spaced numbers over a specified interval
np.linspace(0,10,21)
Out[13]:
array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,
        5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. ])

eye

In [14]:
#creates an identity matrix
np.eye(4)
Out[14]:
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

Random

In [15]:
#Create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1].
np.random.rand(2)
np.random.rand(5,5)
Out[15]:
array([[0.9797668 , 0.94586434, 0.36524117, 0.19784975, 0.15176004],
       [0.09277052, 0.90938651, 0.52618724, 0.62981716, 0.5111796 ],
       [0.96197826, 0.48489467, 0.14034696, 0.69334539, 0.33582117],
       [0.69485323, 0.93141817, 0.03266454, 0.1680806 , 0.49473144],
       [0.19240705, 0.50925566, 0.07623005, 0.34016356, 0.66179273]])
In [16]:
#Return a sample (or samples) from the "standard normal" distribution. Unlike rand which is uniform:
np.random.randn(2)
np.random.randn(5,5)
Out[16]:
array([[-1.58666343,  0.85040663, -0.27919152,  0.01841439,  0.75435485],
       [ 0.89726612,  0.69130702, -1.13555545, -0.66225676,  0.70258642],
       [ 1.44323687, -0.34104132,  0.83226325, -1.37563275,  2.36230149],
       [-0.20648663,  1.00273908, -0.56658769, -1.49940631,  2.05278195],
       [-0.80371781,  0.59568719, -1.63747353, -1.14329534, -0.69771396]])
In [17]:
#Return random integers from `low` (inclusive) to `high` (exclusive).
np.random.randint(1,100)
np.random.randint(1,100,10)
Out[17]:
array([76,  9, 67, 24, 16, 89, 54, 23, 31, 87])

Array Attributes and Methods

In [18]:
arr = np.arange(25)
arr #show
Out[18]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])
In [19]:
ranarr = np.random.randint(0,50,10)
ranarr #show
Out[19]:
array([11, 49, 21,  5, 45, 32, 36, 41, 16, 28])
In [20]:
ranarr.reshape(5,2) #modify the structure of the array
Out[20]:
array([[11, 49],
       [21,  5],
       [45, 32],
       [36, 41],
       [16, 28]])
In [21]:
ranarr.max() #max value
Out[21]:
49
In [22]:
ranarr.argmax() #max value position
Out[22]:
1
In [23]:
ranarr.min()#min value
Out[23]:
5
In [24]:
ranarr.argmin() #min value position
Out[24]:
3
In [25]:
ranarr.ndim #number of axes(dimensions)
Out[25]:
1
In [26]:
ranarr.size #number of elements
Out[26]:
10
In [27]:
ranarr.shape #dimensions
Out[27]:
(10,)
In [28]:
ranarr.dtype #data type
Out[28]:
dtype('int32')
In [29]:
ranarr.itemsize #size in bytes
Out[29]:
4
In [30]:
ranarr.reshape(5,2).shape
Out[30]:
(5, 2)

Indexing and Selection

Bracket Indexing and Selection

In [31]:
#Get a value at an index
arr[8]
Out[31]:
8
In [32]:
#Get values in a range (start included, end excluded)
arr[1:5]
Out[32]:
array([1, 2, 3, 4])
In [33]:
#Copies and clones
arr1 = arr
arr2 = arr.copy
arr is arr1 #true, same object
arr is arr2 #false, different object
Out[33]:
False

Indexing a 2D array (matrices)

In [34]:
#Creating 2D array
arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))
arr_2d
Out[34]:
array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])
In [35]:
#Indexing rows
arr_2d[1] #one row
arr_2d[[0,2]] #list of rows
Out[35]:
array([[ 5, 10, 15],
       [35, 40, 45]])
In [36]:
#Getting individual element value
arr_2d[1][0]
Out[36]:
20
In [37]:
#Getting multiple element values
arr_2d[:2,1:] #first row/s, second column/s
Out[37]:
array([[10, 15],
       [25, 30]])
In [38]:
#Filter
arr = np.arange(1,11)
arr > 4 #filter condition
bool_arr = arr>4 #boolean of selection
arr[bool_arr] #filter
Out[38]:
array([ 5,  6,  7,  8,  9, 10])
In [39]:
arr[arr>4] #direct filter
Out[39]:
array([ 5,  6,  7,  8,  9, 10])
In [40]:
x = 4 #filter condition
arr[arr>x] #filter using condition
Out[40]:
array([ 5,  6,  7,  8,  9, 10])

NumPy Operations

In [41]:
arr = np.arange(1,13).reshape(3,4)
print(arr)
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

Basic Operations

In [42]:
arr + arr #sum
Out[42]:
array([[ 2,  4,  6,  8],
       [10, 12, 14, 16],
       [18, 20, 22, 24]])
In [43]:
arr - arr #subtraction
Out[43]:
array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])
In [44]:
arr * arr #product
Out[44]:
array([[  1,   4,   9,  16],
       [ 25,  36,  49,  64],
       [ 81, 100, 121, 144]])
In [45]:
arr/arr #division
Out[45]:
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])
In [46]:
1/arr #normalization
Out[46]:
array([[1.        , 0.5       , 0.33333333, 0.25      ],
       [0.2       , 0.16666667, 0.14285714, 0.125     ],
       [0.11111111, 0.1       , 0.09090909, 0.08333333]])
In [47]:
arr**2 #power
Out[47]:
array([[  1,   4,   9,  16],
       [ 25,  36,  49,  64],
       [ 81, 100, 121, 144]], dtype=int32)
In [48]:
np.sqrt(arr) #square root
Out[48]:
array([[1.        , 1.41421356, 1.73205081, 2.        ],
       [2.23606798, 2.44948974, 2.64575131, 2.82842712],
       [3.        , 3.16227766, 3.31662479, 3.46410162]])
In [49]:
np.exp(arr) #exponent
Out[49]:
array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01],
       [1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03],
       [8.10308393e+03, 2.20264658e+04, 5.98741417e+04, 1.62754791e+05]])
In [50]:
np.sin(arr) #sin
Out[50]:
array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ],
       [-0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825],
       [ 0.41211849, -0.54402111, -0.99999021, -0.53657292]])
In [51]:
np.log(arr) #logaritm
Out[51]:
array([[0.        , 0.69314718, 1.09861229, 1.38629436],
       [1.60943791, 1.79175947, 1.94591015, 2.07944154],
       [2.19722458, 2.30258509, 2.39789527, 2.48490665]])
In [52]:
np.sum(arr) #total sum
Out[52]:
78
In [53]:
np.sum(arr, axis = 0) #column sum
Out[53]:
array([15, 18, 21, 24])
In [54]:
np.sum(arr, axis = 1) #row sum
Out[54]:
array([10, 26, 42])
In [55]:
np.cumsum(arr, axis = 0) #cumulative column sum
Out[55]:
array([[ 1,  2,  3,  4],
       [ 6,  8, 10, 12],
       [15, 18, 21, 24]], dtype=int32)
In [56]:
np.cumsum(arr, axis = 1) #cumulative row sum
Out[56]:
array([[ 1,  3,  6, 10],
       [ 5, 11, 18, 26],
       [ 9, 19, 30, 42]], dtype=int32)
In [57]:
np.diff(arr) #diference between a number and the following one
Out[57]:
array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])
In [58]:
np.prod(arr) #total product
Out[58]:
479001600
In [59]:
np.cumprod(arr) #acumulated product
Out[59]:
array([        1,         2,         6,        24,       120,       720,
            5040,     40320,    362880,   3628800,  39916800, 479001600],
      dtype=int32)

Basic Statistics

In [60]:
np.max(arr) #maximum
Out[60]:
12
In [61]:
np.min(arr) #minimum
Out[61]:
1
In [62]:
np.average(arr) #average
Out[62]:
6.5
In [63]:
np.mean(arr) #mean
Out[63]:
6.5
In [64]:
np.median(arr) #median
Out[64]:
6.5
In [65]:
np.var(arr) #variance
Out[65]:
11.916666666666666
In [66]:
np.std(arr) #standard deviation
Out[66]:
3.452052529534663
In [67]:
np.bincount(arr[0]) #values occurency
Out[67]:
array([0, 1, 1, 1, 1], dtype=int64)
In [68]:
np.corrcoef(arr) #Pearson relation between vectprs
Out[68]:
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])
In [69]:
np.cov(arr) #covariance matrix
Out[69]:
array([[1.66666667, 1.66666667, 1.66666667],
       [1.66666667, 1.66666667, 1.66666667],
       [1.66666667, 1.66666667, 1.66666667]])

Rounding Methods

In [70]:
arrdem = np.arange(0,8,0.23)
arrdem
Out[70]:
array([0.  , 0.23, 0.46, 0.69, 0.92, 1.15, 1.38, 1.61, 1.84, 2.07, 2.3 ,
       2.53, 2.76, 2.99, 3.22, 3.45, 3.68, 3.91, 4.14, 4.37, 4.6 , 4.83,
       5.06, 5.29, 5.52, 5.75, 5.98, 6.21, 6.44, 6.67, 6.9 , 7.13, 7.36,
       7.59, 7.82])
In [71]:
np.rint(arrdem) #to the nearest integer
Out[71]:
array([0., 0., 0., 1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3., 3., 4.,
       4., 4., 4., 5., 5., 5., 5., 6., 6., 6., 6., 6., 7., 7., 7., 7., 8.,
       8.])
In [72]:
np.round(arrdem, 1) #to the nearest number for the precision defined (decimals)
Out[72]:
array([0. , 0.2, 0.5, 0.7, 0.9, 1.2, 1.4, 1.6, 1.8, 2.1, 2.3, 2.5, 2.8,
       3. , 3.2, 3.4, 3.7, 3.9, 4.1, 4.4, 4.6, 4.8, 5.1, 5.3, 5.5, 5.8,
       6. , 6.2, 6.4, 6.7, 6.9, 7.1, 7.4, 7.6, 7.8])
In [73]:
np.ceil(arrdem) #to te nearest upper integer
Out[73]:
array([0., 1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3., 3., 4., 4., 4.,
       4., 5., 5., 5., 5., 6., 6., 6., 6., 6., 7., 7., 7., 7., 8., 8., 8.,
       8.])
In [74]:
np.floor(arrdem) #to te nearest lower integer
Out[74]:
array([0., 0., 0., 0., 0., 1., 1., 1., 1., 2., 2., 2., 2., 2., 3., 3., 3.,
       3., 4., 4., 4., 4., 5., 5., 5., 5., 5., 6., 6., 6., 6., 7., 7., 7.,
       7.])
In [75]:
np.trunc(arrdem) #truncates to integer
Out[75]:
array([0., 0., 0., 0., 0., 1., 1., 1., 1., 2., 2., 2., 2., 2., 3., 3., 3.,
       3., 4., 4., 4., 4., 5., 5., 5., 5., 5., 6., 6., 6., 6., 7., 7., 7.,
       7.])
In [76]:
np.clip(arrdem, 2, 6) #clips lower and upper values according to the boundaries
Out[76]:
array([2.  , 2.  , 2.  , 2.  , 2.  , 2.  , 2.  , 2.  , 2.  , 2.07, 2.3 ,
       2.53, 2.76, 2.99, 3.22, 3.45, 3.68, 3.91, 4.14, 4.37, 4.6 , 4.83,
       5.06, 5.29, 5.52, 5.75, 5.98, 6.  , 6.  , 6.  , 6.  , 6.  , 6.  ,
       6.  , 6.  ])

Shape manipulation

In [77]:
np.sort(arr) #sort values ascending
Out[77]:
array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])
In [78]:
-np.sort(-arr) #sort values descending
Out[78]:
array([[ 4,  3,  2,  1],
       [ 8,  7,  6,  5],
       [12, 11, 10,  9]])
In [79]:
np.ravel(arr) #flattens the values to a single line
Out[79]:
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
In [80]:
arr.T #transposes the matrix
Out[80]:
array([[ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11],
       [ 4,  8, 12]])
In [81]:
arr.resize(4,3) #modifies arrays structure
arr
Out[81]:
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])
In [82]:
np.vstack((arr, arr)) #joins vertically (rows)
Out[82]:
array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])
In [83]:
np.hstack((arr,arr)) #joins horizontally (columns)
Out[83]:
array([[ 1,  2,  3,  1,  2,  3],
       [ 4,  5,  6,  4,  5,  6],
       [ 7,  8,  9,  7,  8,  9],
       [10, 11, 12, 10, 11, 12]])
In [84]:
np.split(arr, 2) #splits in equal parts the rows
Out[84]:
[array([[1, 2, 3],
        [4, 5, 6]]), array([[ 7,  8,  9],
        [10, 11, 12]])]
In [85]:
np.hsplit(arr, 3) #splits in equal parts the columns
Out[85]:
[array([[ 1],
        [ 4],
        [ 7],
        [10]]), array([[ 2],
        [ 5],
        [ 8],
        [11]]), array([[ 3],
        [ 6],
        [ 9],
        [12]])]

Broadcasting

In [86]:
arr[0:3,0] = 100 #updates values
arr
Out[86]:
array([[100,   2,   3],
       [100,   5,   6],
       [100,   8,   9],
       [ 10,  11,  12]])
In [87]:
slice_of_arr = arr[:2, :3] #cuts a portion of values
slice_of_arr
Out[87]:
array([[100,   2,   3],
       [100,   5,   6]])
In [88]:
arr[arr > 6] = 20 #filter to update
arr
Out[88]:
array([[20,  2,  3],
       [20,  5,  6],
       [20, 20, 20],
       [20, 20, 20]])