2 min readNov 21, 2021
Numpy
Post 03 — Sorting methods
import numpy as npnp.set_printoptions(suppress=True)data = np.genfromtxt(r'./Numpy_Datasets/Lifecyclesavings.csv', delimiter=',', skip_header=1)
print(data.ndim)
print(data.shape)
print(data[:5,:])2
(50, 5)
[[ 11.43 29.35 2.87 2329.68 2.87]
[ 12.07 23.32 4.41 1507.99 3.93]
[ 13.17 23.8 4.43 2108.47 3.82]
[ 5.75 41.89 1.67 189.13 0.22]
[ 12.88 42.19 0.83 728.47 4.56]]dt = {'names':["sr","pop15","pop75","dpi","ddpi"],
'formats':[float, float, float, float, float]}
data.dtype=dt
print(data.ndim)
print(data.shape)2
(50, 1)data[:5]array([[(11.43, 29.35, 2.87, 2329.68, 2.87)],
[(12.07, 23.32, 4.41, 1507.99, 3.93)],
[(13.17, 23.8 , 4.43, 2108.47, 3.82)],
[( 5.75, 41.89, 1.67, 189.13, 0.22)],
[(12.88, 42.19, 0.83, 728.47, 4.56)]],
dtype=[('sr', '<f8'), ('pop15', '<f8'), ('pop75', '<f8'), ('dpi', '<f8'), ('ddpi', '<f8')])# sort data inplace row-wise
data.sort(axis=0)
data[:5]array([[(0.6 , 39.74, 1.34, 662.86, 2.67)],
[(1.27, 34.03, 3.08, 1900.1 , 1.12)],
[(2.02, 41.18, 1.05, 220.56, 1.03)],
[(2.81, 46.12, 1.21, 249.87, 1.13)],
[(3.01, 46.05, 0.87, 289.71, 1.48)]],
dtype=[('sr', '<f8'), ('pop15', '<f8'), ('pop75', '<f8'), ('dpi', '<f8'), ('ddpi', '<f8')])# descending sort
data.sort(axis=0)
data[::-1][:5]array([[(21.1 , 27.01, 1.91, 1257.28, 8.21)],
[(18.56, 45.25, 0.56, 138.33, 5.14)],
[(16.85, 24.42, 3.93, 2496.53, 3.99)],
[(15.48, 32.54, 2.47, 601.05, 8.12)],
[(14.65, 24.71, 3.25, 1740.7 , 7.66)]],
dtype=[('sr', '<f8'), ('pop15', '<f8'), ('pop75', '<f8'), ('dpi', '<f8'), ('ddpi', '<f8')])#Lets reimport data
data = np.genfromtxt(r'./Numpy_Datasets/Lifecyclesavings.csv', delimiter=',', skip_header=1)
print(data.ndim)
print(data.shape)
print(data[:5,:])2
(50, 5)
[[ 11.43 29.35 2.87 2329.68 2.87]
[ 12.07 23.32 4.41 1507.99 3.93]
[ 13.17 23.8 4.43 2108.47 3.82]
[ 5.75 41.89 1.67 189.13 0.22]
[ 12.88 42.19 0.83 728.47 4.56]]# first argsort on basis of first col to get index of elements , then using that index display first 5 rows of data
data[np.argsort(data[:,0])][:5]array([[ 0.6 , 39.74, 1.34, 662.86, 2.67],
[ 1.27, 34.03, 3.08, 1900.1 , 1.12],
[ 2.02, 41.18, 1.05, 220.56, 1.03],
[ 2.81, 46.12, 1.21, 249.87, 1.13],
[ 3.01, 46.05, 0.87, 289.71, 1.48]])# reverse sort
np.argsort(data[:,0])array([ 6, 18, 31, 41, 16, 11, 23, 30, 49, 8, 40, 3, 38, 29, 43, 17, 46,
42, 5, 48, 19, 44, 47, 26, 24, 28, 15, 9, 35, 12, 20, 0, 37, 7,
1, 34, 14, 13, 32, 33, 4, 2, 36, 39, 21, 27, 25, 10, 45, 22],
dtype=int64)data[np.argsort(data[:,0])[::-1]][:5]array([[ 21.1 , 27.01, 1.91, 1257.28, 8.21],
[ 18.56, 45.25, 0.56, 138.33, 5.14],
[ 16.85, 24.42, 3.93, 2496.53, 3.99],
[ 15.48, 32.54, 2.47, 601.05, 8.12],
[ 14.65, 24.71, 3.25, 1740.7 , 7.66]])### lexsort sorting happens from right to left key columns
data = data.round()
data[np.lexsort((data[:,1].tolist(), data[:,0].tolist()))][:5]array([[ 1., 34., 3., 1900., 1.],
[ 1., 40., 1., 663., 3.],
[ 2., 41., 1., 221., 1.],
[ 3., 46., 1., 290., 1.],
[ 3., 46., 1., 250., 1.]])# reimport
data = np.genfromtxt(r'./Numpy_Datasets/Lifecyclesavings.csv', delimiter=',', skip_header=1)
data = data.round()
# sort 2nd index column ascending order and then 4th index column in descending order
data[np.lexsort([-data[:,4], data[:,2]])][:10]array([[ 12., 45., 1., 290., 7.],
[ 4., 42., 1., 208., 6.],
[ 13., 42., 1., 728., 5.],
[ 19., 45., 1., 138., 5.],
[ 5., 47., 1., 243., 5.],
[ 4., 44., 1., 569., 4.],
[ 1., 40., 1., 663., 3.],
[ 5., 47., 1., 277., 3.],
[ 11., 48., 1., 471., 3.],
[ 8., 47., 1., 232., 3.]])