import numpy as np

print(np.nan == np.nan)  # False
print(np.nan != np.nan)  # True
print(0*np.nan) # nan
print(np.nan-np.nan) # nan
print(np.inf>np.nan) # False


import numpy as np

x = np.array([1, 1, 8, np.nan, 10])
print(x)
# [ 1.  1.  8. nan 10.]

y = np.isnan(x)
print(y)
# [False False False  True False]

z = np.count_nonzero(y)
print(z)  # 1


import numpy as np

a = np.dtype('b1')
print(a.type)  # <class 'numpy.bool_'>
print(a.itemsize)  # 1

a = np.dtype('i1')
print(a.type)  # <class 'numpy.int8'>
print(a.itemsize)  # 1
a = np.dtype('i2')
print(a.type)  # <class 'numpy.int16'>
print(a.itemsize)  # 2
a = np.dtype('i4')
print(a.type)  # <class 'numpy.int32'>
print(a.itemsize)  # 4
a = np.dtype('i8')
print(a.type)  # <class 'numpy.int64'>
print(a.itemsize)  # 8

a = np.dtype('u1')
print(a.type)  # <class 'numpy.uint8'>
print(a.itemsize)  # 1

a = np.dtype('f4')
print(a.type)  # <class 'numpy.float32'>
print(a.itemsize)  # 4

a = np.dtype('S')
print(a.type)  # <class 'numpy.bytes_'>
print(a.itemsize)  # 0
a = np.dtype('S3')
print(a.type)  # <class 'numpy.bytes_'>
print(a.itemsize)  # 3

a = np.dtype('U3')
print(a.type)  # <class 'numpy.str_'>
print(a.itemsize)  # 12


import numpy as np

ii16 = np.iinfo(np.int16)
print(ii16.min)  # -32768
print(ii16.max)  # 32767

ii32 = np.iinfo(np.int32)
print(ii32.min)  # -2147483648
print(ii32.max)  # 2147483647

ff16 = np.finfo(np.float16)
print(ff16.bits)  # 16
print(ff16.min)  # -65500.0
print(ff16.max)  # 65500.0
print(ff16.eps)  # 0.000977

ff32 = np.finfo(np.float32)
print(ff32.bits)  # 32
print(ff32.min)  # -3.4028235e+38
print(ff32.max)  # 3.4028235e+38
print(ff32.eps)  # 1.1920929e-07


import numpy as np

a = np.datetime64('2020-03-01')
print(a, a.dtype)  # 2020-03-01 datetime64[D]

a = np.datetime64('2020-03-08 20:00:05')
print(a, a.dtype)  # 2020-03-08T20:00:05 datetime64[s]

###################################################################
# 也可以强制指定
a = np.datetime64('2020-03', 'D')
print(a, a.dtype)  # 2020-03-01 datetime64[D]

a = np.datetime64('2020-03', 'Y')
print(a, a.dtype)  # 2020 datetime64[Y]

print(np.datetime64('2020-03') == np.datetime64('2020-03-01'))  # True
print(np.datetime64('2020-03') == np.datetime64('2020-03-02'))  #False

###################################################################
# 从字符串创建 datetime64 数组时，如果单位不统一，则一律转化成其中最小的单位。
a = np.array(['2020-03', '2020-03-08', '2020-03-08 20:00'], dtype='datetime64')
print(a, a.dtype)  # ['2020-03-01T00:00' '2020-03-08T00:00' '2020-03-08T20:00'] datetime64[m]

###################################################################
# 使用arange()创建 datetime64 数组，用于生成日期范围。
a = np.arange('2020-08-01', '2020-08-10',2, dtype=np.datetime64)
print(a)
# ['2020-08-01' '2020-08-03' '2020-08-05' '2020-08-07' '2020-08-09']
print(a.dtype)  # datetime64[D]

a = np.arange('2020-08-01 20:00', '2020-08-10', dtype=np.datetime64)
print(a)
# ['2020-08-01T20:00' '2020-08-01T20:01' '2020-08-01T20:02' ...
#  '2020-08-09T23:57' '2020-08-09T23:58' '2020-08-09T23:59']
print(a.dtype)  # datetime64[m]

a = np.arange('2020-05', '2020-12', dtype=np.datetime64)
print(a)
# ['2020-05' '2020-06' '2020-07' '2020-08' '2020-09' '2020-10' '2020-11']
print(a.dtype)  # datetime64[M]


import numpy as np

a = np.datetime64('2020-03-08') - np.datetime64('2020-03-07')
b = np.datetime64('2020-03-08') - np.datetime64('202-03-07 08:00')
c = np.datetime64('2020-03-08') - np.datetime64('2020-03-07 23:00', 'D')

print(a, a.dtype)  # 1 days timedelta64[D]
print(b, b.dtype)  # 956178240 minutes timedelta64[m]
print(c, c.dtype)  # 1 days timedelta64[D]

a = np.datetime64('2020-03') + np.timedelta64(20, 'D')
b = np.datetime64('2020-06-15 00:00') + np.timedelta64(12, 'h')
print(a, a.dtype)  # 2020-03-21 datetime64[D]
print(b, b.dtype)  # 2020-06-15T12:00 datetime64[m]

###################################################################
# 注意年（'Y'）和月（'M'）这两个单位无法和其它单位进行运算（一年有几天？一个月有几个小时？这些都是不确定的）
a = np.timedelta64(1, 'Y')
b = np.timedelta64(a, 'M')
print(a)  # 1 years
print(b)  # 12 months
print(np.timedelta64(a, 'D'))
# TypeError: Cannot cast NumPy timedelta64 scalar from metadata [Y] to [D] according to the rule 'same_kind

###################################################################
# 运算示例
a = np.timedelta64(1, 'Y')
b = np.timedelta64(6, 'M')
c = np.timedelta64(1, 'W')
d = np.timedelta64(1, 'D')
e = np.timedelta64(10, 'D')

print(a)  # 1 years
print(b)  # 6 months
print(a + b)  # 18 months
print(a - b)  # 6 months
print(2 * a)  # 2 years
print(a / b)  # 2.0
print(c / d)  # 7.0
print(c % e)  # 7 days


# numpy.datetime64 与 datetime.datetime 相互转换
import numpy as np,datetime

dt = datetime.datetime(year=2020, month=6, day=1, hour=20, minute=5, second=30)
print(dt) #2020-06-01 20:05:30
dt64 = np.datetime64(dt, 's')
print(dt64, dt64.dtype) # 2020-06-01T20:05:30 datetime64[s]

dt2 = dt64.astype(datetime.datetime)
print(dt2, type(dt2))
# 2020-06-01 20:05:30 <class 'datetime.datetime'>


# 将指定的偏移量应用于工作日，单位天（'D'）。计算下一个工作日，如果当前日期为非工作日，默认报错。
# 可以指定 forward 或 backward 规则来避免报错。（一个是向前取第一个有效的工作日，一个是向后取第一个有效的工作日）

import numpy as np

# 2020-07-10 星期五
a = np.busday_offset('2020-07-10', offsets=1)# 计算下一个工作日
print(a)  # 2020-07-13

a = np.busday_offset('2020-07-11', offsets=1)# 当前日期为非工作日，默认报错
print(a)
# ValueError: Non-business day date in busday_offset

a = np.busday_offset('2020-07-11', offsets=0, roll='forward')
b = np.busday_offset('2020-07-11', offsets=0, roll='backward')
print(a)  # 2020-07-13
print(b)  # 2020-07-10

a = np.busday_offset('2020-07-11', offsets=1, roll='forward')
b = np.busday_offset('2020-07-11', offsets=1, roll='backward')
print(a)  # 2020-07-14
print(b)  # 2020-07-13

###################################################################
# 返回指定日期是否是工作日
a = np.is_busday('2020-07-10')
b = np.is_busday('2020-07-11')
print(a)  # True
print(b)  # False

###################################################################
# 统计一个 datetime64[D] 数组中的工作日天数。
begindates = np.datetime64('2020-07-10')
enddates = np.datetime64('2020-07-20')
a = np.arange(begindates, enddates, dtype='datetime64')
b = np.count_nonzero(np.is_busday(a))
print(a)
# ['2020-07-10' '2020-07-11' '2020-07-12' '2020-07-13' '2020-07-14'
#  '2020-07-15' '2020-07-16' '2020-07-17' '2020-07-18' '2020-07-19']
print(b)  # 6


# 返回两个日期之间的工作日数量。
a = np.busday_count(begindates, enddates)
b = np.busday_count(enddates, begindates)
print(a)  # 6
print(b)  # -6


import numpy as np
# 创建一维数组
a = np.array([0, 1, 2, 3, 4])# 列表
b = np.array((0, 1, 2, 3, 4))# 元组
print(a, type(a))
# [0 1 2 3 4] <class 'numpy.ndarray'>
print(b, type(b))
# [0 1 2 3 4] <class 'numpy.ndarray'>

# 创建二维数组
c = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
print(c, type(c))
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]] <class 'numpy.ndarray'>

# 创建三维数组
d = np.array([[(1.5, 2, 3), (4, 5, 6)],
              [(3, 2, 1), (4, 5, 6)]])
print(d, type(d))
# [[[1.5 2.  3. ]
#   [4.  5.  6. ]]
#
#  [[3.  2.  1. ]
#   [4.  5.  6. ]]] <class 'numpy.ndarray'>


import numpy as np

x = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
y = np.array(x)
z = np.asarray(x)
x[1][2] = 2
print(x,type(x))
# [[1, 1, 1], [1, 1, 2], [1, 1, 1]] <class 'list'>

print(z,type(z))# 由列表转换为一个独立的ndarray，不会随x改变
# [[1 1 1]
#  [1 1 1]
#  [1 1 1]] <class 'numpy.ndarray'>

x = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
z = np.asarray(x)
print(z,type(z),z.dtype)# x本身为ndarray，dtype不变，会随x改变
# [[1 1 1]
#  [1 1 2]
#  [1 1 1]] <class 'numpy.ndarray'> int32


import numpy as np

def f(x, y):
    return 10 * x + y

x = np.fromfunction(f, (5, 4), dtype=int)
print(x)
# [[ 0  1  2  3]
#  [10 11 12 13]
#  [20 21 22 23]
#  [30 31 32 33]
#  [40 41 42 43]]
# 每个元素 (i, j) 的值是通过 f(i, j) 计算的，其中 i 是行索引，j 是列索引

x = np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
print(x)
# [[ True False False]
#  [False  True False]
#  [False False  True]]


import numpy as np

x = np.zeros(5)
print(x)  # [0. 0. 0. 0. 0.]
x = np.zeros([2, 3])
print(x)
# [[0. 0. 0.]
#  [0. 0. 0.]]

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.zeros_like(x)
print(y)
# [[0 0 0]
#  [0 0 0]]


import numpy as np

x = np.arange(9).reshape((3, 3))
print(x)
# [[0 1 2]
#  [3 4 5]
#  [6 7 8]]
print(np.diag(x))  # [0 4 8]
print(np.diag(x, k=1))  # [1 5]
print(np.diag(x, k=-1))  # [3 7]

v = [1, 3, 5, 7]
x = np.diag(v)
print(x)
# [[1 0 0 0]
#  [0 3 0 0]
#  [0 0 5 0]
#  [0 0 0 7]]


import numpy as np

x = np.full(2, 7)
print(x)
# [7 7]

x = np.full((2, 7), True)
print(x)
# [[ True  True  True  True  True  True  True]
#   [ True  True  True  True  True  True  True]]

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.full_like(x, 7)
print(y)
# [[7 7 7]
#  [7 7 7]]


import numpy as np

x = np.arange(5)
print(x)  # [0 1 2 3 4]

x = np.arange(3, 7, 2)
print(x)  # [3 5]

x = np.linspace(start=0, stop=2, num=9)# num=9 指定要生成的样本数量
print(x)  
# [0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.  ]

x = np.logspace(0, 1, 5)# 0表示 10^0  1表示 10^1  5 是要生成的样本数量
print(np.around(x, 2))
# [ 1.    1.78  3.16  5.62 10.  ]            
#np.around 返回四舍五入后的值，可指定精度。
# around(a, decimals=0, out=None)
# a 输入数组
# decimals 要舍入的小数位数。 默认值为0。 如果为负，整数将四舍五入到小数点左侧的位置

x = np.random.random(5)
print(x)
# [0.41768753 0.16315577 0.80167915 0.99690199 0.11812291]随机的

x = np.random.random([2, 3])
print(x)
#[[0.33150306 0.02528108 0.01043121]
# [0.19817739 0.19836231 0.13301785]]


# 利用字典来定义结构
import numpy as np

personType = np.dtype({
    'names': ['name', 'age', 'weight'],
    'formats': ['U30', 'i8', 'f8']})
# 'names' 指定字段的名称。
# 'formats' 指定每个字段的数据类型。
# 'U30' 表示一个长度最多为 30 的 Unicode 字符串。
# 'i8' 表示一个 64 位整数。
# 'f8' 表示一个 64 位浮点数。

a = np.array([('Liming', 24, 63.9), ('Mike', 15, 67.), ('Jan', 34, 45.8)],
             dtype=personType)
print(a, type(a))
# [('Liming', 24, 63.9) ('Mike', 15, 67. ) ('Jan', 34, 45.8)] <class 'numpy.ndarray'>


# 利用包含多个元组的列表来定义结构

import numpy as np

personType = np.dtype([('name', 'U30'), ('age', 'i8'), ('weight', 'f8')])
a = np.array([('Liming', 24, 63.9), ('Mike', 15, 67.), ('Jan', 34, 45.8)],dtype=personType)
print(a, type(a))
# [('Liming', 24, 63.9) ('Mike', 15, 67. ) ('Jan', 34, 45.8)]
# <class 'numpy.ndarray'>

# 结构数组的取值方式和一般数组差不多，可以通过下标取得元素：
print(a[0])
# ('Liming', 24, 63.9)

print(a[-2:])
# [('Mike', 15, 67. ) ('Jan', 34, 45.8)]

# 我们可以使用字段名作为下标获取对应的值
print(a['name'])
# ['Liming' 'Mike' 'Jan']
print(a['age'])
# [24 15 34]
print(a['weight'])
# [63.9 67.  45.8]


import numpy as np

a = np.array([1, 2, 3, 4, 5])
print(a.shape)  # (5,)
print(a.dtype)  # int32
print(a.size)  # 5
print(a.ndim)  # 1
print(a.itemsize)  # 4

b = np.array([[1, 2, 3], [4, 5, 6.0]])
print(b.shape)  # (2, 3)
print(b.dtype)  # float64
print(b.size)  # 6
print(b.ndim)  # 2
print(b.itemsize)  # 8


c = np.array([1, 2, 3, 4, '5'])
print(c)  # ['1' '2' '3' '4' '5']
d = np.array([1, 2, 3, 4, 5.0])
print(d)  # [1. 2. 3. 4. 5.]


import numpy as np

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x   #相同内存地址
y[0] = -1
print(x)
# [-1  2  3  4  5  6  7  8]
print(y)
# [-1  2  3  4  5  6  7  8]

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x.copy()
y[0] = -1
print(x)
# [1 2 3 4 5 6 7 8]
print(y)
# [-1  2  3  4  5  6  7  8]


# 索引
import numpy as np

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
print(x[2])  # 3

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
print(x[2])  # [21 22 23 24 25]
print(x[2][1])  # 22
print(x[2, 1])  # 22


# 切片[start:stop:step]前闭后开
import numpy as np
# 对一维数组的切片
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
print(x[0:2])  # [1 2]
print(x[1:5:2])  # [2 4]
print(x[2:])  # [3 4 5 6 7 8]
print(x[:2])  # [1 2]
print(x[-2:])  # [7 8]
print(x[:-2])  # [1 2 3 4 5 6]
print(x[:])  # [1 2 3 4 5 6 7 8]
print(x[::-1])  # [8 7 6 5 4 3 2 1]


# 对二维数组切片[行(start:stop:step)，列(start:stop:step)]
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
print(x[0:2])
# [[11 12 13 14 15]
#  [16 17 18 19 20]]

print(x[1:5:2])
# [[16 17 18 19 20]
#  [26 27 28 29 30]]

print(x[-2:])
# [[26 27 28 29 30]
#  [31 32 33 34 35]]

print(x[:-2])
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]]

print(x[:])
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]

print(x[2, :])  # [21 22 23 24 25]  访问第 3 行的所有列元素
print(x[:, 2])  # [13 18 23 28 33]  访问所有行的第 3 列元素
print(x[0, 1:4])  # [12 13 14]      1 行的第 2 到第 4 列元素
print(x[1:4, 0])  # [16 21 26]
print(x[1:3, 2:4])
# [[18 19]
#  [23 24]]

print(x[:, :])
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]

print(x[::2, ::2])
# [[11 13 15]
#  [21 23 25]
#  [31 33 35]]

print(x[::-1, :])
# [[31 32 33 34 35]
#  [26 27 28 29 30]
#  [21 22 23 24 25]
#  [16 17 18 19 20]
#  [11 12 13 14 15]]

print(x[:, ::-1])
# [[15 14 13 12 11]
#  [20 19 18 17 16]
#  [25 24 23 22 21]
#  [30 29 28 27 26]
#  [35 34 33 32 31]]


import numpy as np

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
print(x[0, 1, 2])
# [1 2 3]
print(x[0, 1, -1])
# [1 2 8]

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
print(x[0, 1, 2])
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]]
print(x[0, 1, -1])
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [31 32 33 34 35]]

y = x[[0, 1, 2], [2, 3, 4]]#[0,2][1,3][2,4]
print(y)
# [13 19 25]

###################################################################

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
r = np.array([[0, 1], [3, 4]])
print(x[r])
# [[1 2]
#  [4 5]]

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])

r = np.array([[0, 1], [3, 4]])
print(x[r])
# [[[11 12 13 14 15]
#   [16 17 18 19 20]]
#
#  [[26 27 28 29 30]
#   [31 32 33 34 35]]]

# 获取数组中的四个角的元素
r = np.array([[0, 0], [4, 4]])
c = np.array([[0, 4], [0, 4]])
y = x[r, c]
print(y)
# [[11 15]
#  [31 35]]

###################################################################
# 与普通索引结合
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])

y = x[0:3, [1, 2, 2]]
print(y)
# [[12 13 13]
#  [17 18 18]
#  [22 23 23]]

###################################################################

# numpy. take(a, indices, axis=None, out=None, mode='raise') 沿指定轴从数组中提取元素

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
r = [0, 1, 2]
print(np.take(x, r))
# [1 2 3]

r = [0, 1, -1]
print(np.take(x, r))
# [1 2 8]

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])

r = [0, 1, 2]
print(np.take(x, r, axis=0))
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]]

r = [0, 1, -1]
print(np.take(x, r, axis=0))
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [31 32 33 34 35]]
###################################################################
# 注意：使用切片时，生成的是视图，会影响原数组
#  但是数组索引，是副本，不影响原数组。 

# 切片
a=np.array([[1,2],
            [3,4],
            [5,6]])
b=a[0:1,0:1]#[[1]]
b[0,0]=2
print(a[0,0]==b)
#[[True]]

# 数组索引
b=a[0,0]
b=2
print(a[0,0]==b)
#False


import numpy as np

x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x > 5
print(y)
# [False False False False False  True  True  True]
print(x[x > 5])
# [6 7 8]

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = x > 25
print(y)
# [[False False False False False]
#  [False False False False False]
#  [False False False False False]
#  [ True  True  True  True  True]
#  [ True  True  True  True  True]]
print(x[x > 25])
# [26 27 28 29 30 31 32 33 34 35]

# 使用 np.isnan(x) 生成一个布尔数组，指示哪些元素是 NaN
# 然后使用 np.logical_not() 取反，得到一个布尔数组，指示哪些元素不是 NaN
x = np.array([np.nan, 1, 2, np.nan, 3, 4, 5])
y = np.logical_not(np.isnan(x))
# 使用布尔索引来过滤掉 NaN 值
print(x[y])
# [1. 2. 3. 4. 5.]


import numpy as np
import matplotlib.pyplot as plt
# x 是一个从 0 到 2π 的等间距数组，包含 50 个点。
# y 是 x 中每个点对应的正弦值
x = np.linspace(0, 2 * np.pi, 50)
y = np.sin(x)
print(len(x))  # 50
plt.plot(x, y)
# 布尔掩码 mask:
# y >= 0 生成一个布尔数组，表示 y 中每个值是否大于或等于 0。
# mask 是一个布尔数组，与 x 和 y 的长度相同
mask = y >= 0
print(len(x[mask]))  # 25
print(mask)
'''
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True False False False False False False False False False False False
 False False False False False False False False False False False False
 False False]
'''
# 绘制筛选出的点，使用蓝色圆圈标记 ('bo')。
plt.plot(x[mask], y[mask], 'bo')
# 复合条件 mask:
# np.logical_and(y >= 0, x <= np.pi / 2) 生成一个布尔数组，表示 y 大于等于 0 且 x 小于等于 π/2 的点。
# 打印 mask，可以看到满足条件的点为 [0, π/2] 区间内的正弦正半周期点
mask = np.logical_and(y >= 0, x <= np.pi / 2)
print(mask)
'''
[ True  True  True  True  True  True  True  True  True  True  True  True
  True False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False]
'''
# 绘制满足复合条件的点，使用绿色圆圈标记 ('go')
plt.plot(x[mask], y[mask], 'go')
plt.show()


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
# 对每一列求和
y = np.apply_along_axis(np.sum, 0, x)
print(y)  # [105 110 115 120 125]
y = np.apply_along_axis(np.sum, 1, x)
print(y)  # [ 65  90 115 140 165]

y = np.apply_along_axis(np.mean, 0, x)
print(y)  # [21. 22. 23. 24. 25.]
y = np.apply_along_axis(np.mean, 1, x)
print(y)  # [13. 18. 23. 28. 33.]


def my_func(x):
    return (x[0] + x[-1]) * 0.5

# 对每一列的第一和最后一个求均值
y = np.apply_along_axis(my_func, 0, x)
print(y)  # [21. 22. 23. 24. 25.]
y = np.apply_along_axis(my_func, 1, x)
print(y)  # [13. 18. 23. 28. 33.]


x = np.array([1, 2, 9, 4, 5, 6, 7, 8])
print(x.shape)  # (8,)
x.shape = [2, 4]
print(x)
# [[1 2 9 4]
#  [5 6 7 8]]


# .flat会影响原数组

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = x.flat
print(y)
# <numpy.flatiter object at 0x0000020F9BA10C60> 表示 y 是一个 numpy.flatiter 对象，并给出了它的内存地址
for i in y:
    print(i, end=' ')
# 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35

y[3] = 0
print(x)
# [[11 12 13  0 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]


# flatten()函数返回的是副本，不影响原数组

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = x.flatten()# 默认为order='C'
print(y)
# [11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35]

y[3] = 0
print(x)
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]


# ravel()返回的是视图,影响原数组

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.ravel(x)
print(y)
# [11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35]

y[3] = 0
print(x)
# [[11 12 13  0 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]

# 【例】order=F 就是拷贝

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])

y = np.ravel(x, order='F')
print(y)
# [11 16 21 26 31 12 17 22 27 32 13 18 23 28 33 14 19 24 29 34 15 20 25 30
#  35]

y[3] = 0
print(x)
# [[11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [31 32 33 34 35]]


# reshape()函数当参数newshape = [rows,-1]时，将根据行数自动确定列数,rows=-1时同理
# 修改会影响原数组
# 当参数newshape = -1时，表示将数组降为一维

x = np.arange(12)
y = np.reshape(x, [3, 4])
print(y)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

y = np.reshape(x,[-1,3])
print(y)
# [[ 0  1  2]
#  [ 3  4  5]
#  [ 6  7  8]
#  [ 9 10 11]]

y = np.reshape(x, -1)
print(y)
# [ 0  1  2  3  4  5  6  7  8  9 10 11]

y[1] = 10
print(x)
# [ 0 10  2  3  4  5  6  7  8  9 10 11]


import numpy as np

x = np.random.rand(5, 5) * 10   # 创建一个 5x5 的二维数组（矩阵），其中的每个元素都是从 0 到 1*10 之间的随机浮点数
x = np.around(x, 2)   # 对数组 x 中的每个元素进行四舍五入，保留两位小数
print(x)
# [[6.74 8.46 6.74 5.45 1.25]
#  [3.54 3.49 8.62 1.94 9.92]
#  [5.03 7.22 1.6  8.7  0.43]
#  [7.5  7.31 5.69 9.67 7.65]
#  [1.8  9.52 2.78 5.87 4.14]]
y = x.T
print(y)
# [[6.74 3.54 5.03 7.5  1.8 ]
#  [8.46 3.49 7.22 7.31 9.52]
#  [6.74 8.62 1.6  5.69 2.78]
#  [5.45 1.94 8.7  9.67 5.87]
#  [1.25 9.92 0.43 7.65 4.14]]
y = np.transpose(x)
print(y)
# [[6.74 3.54 5.03 7.5  1.8 ]
#  [8.46 3.49 7.22 7.31 9.52]
#  [6.74 8.62 1.6  5.69 2.78]
#  [5.45 1.94 8.7  9.67 5.87]
#  [1.25 9.92 0.43 7.65 4.14]]


import numpy as np
x = np.array([1, 2, 9, 4, 5, 6, 7, 8])
print(x.shape)  # (8,)
print(x)  # [1 2 9 4 5 6 7 8]

y = x[np.newaxis, :]
print(y.shape)  # (1, 8)
print(y)  # [[1 2 9 4 5 6 7 8]]

y = x[:, np.newaxis]
print(y.shape)  # (8, 1)
print(y)
# [[1]
#  [2]
#  [9]
#  [4]
#  [5]
#  [6]
#  [7]
#  [8]]


# 在机器学习和深度学习中，通常算法的结果是可以表示向量的数组（即包含两对或以上的方括号形式[[]]），
# 如果直接利用这个数组进行画图可能显示界面为空
# 我们可以利用squeeze()函数将表示向量的数组转换为秩为1的数组，这样利用 matplotlib 库函数画图时，就可以正常的显示结果了

import numpy as np

x = np.arange(10)
print(x.shape)  # (10,)
x = x[np.newaxis, :]
print(x.shape)  # (1, 10)
y = np.squeeze(x)
print(y.shape)  # (10,)

##################################################################
x = np.array([[[0], [1], [2]]])
print(x.shape)  # (1, 3, 1)
print(x)
# [[[0]
#   [1]
#   [2]]]

y = np.squeeze(x)
print(y.shape)  # (3,)
print(y)  # [0 1 2]

y = np.squeeze(x, axis=0)
print(y.shape)  # (3, 1)
print(y)
# [[0]
#  [1]
#  [2]]

y = np.squeeze(x, axis=2)
print(y.shape)  # (1, 3)
print(y)  # [[0 1 2]]

y = np.squeeze(x, axis=1)
# ValueError: cannot select an axis to squeeze out which has size not equal to one

##################################################################
# 画图示例
import matplotlib.pyplot as plt

x = np.array([[1, 4, 9, 16, 25]])
print(x.shape)  # (1, 5)
plt.plot(x)
plt.show()

##################################################################

x = np.array([[1, 4, 9, 16, 25]])
x = np.squeeze(x)
print(x.shape)  # (5, )
plt.plot(x)
plt.show()


# 【例】连接沿现有轴的数组序列
# 原来x，y都是一维的，拼接后也是一维的

x = np.array([1, 2, 3])
y = np.array([7, 8, 9])
z = np.concatenate([x, y])
print(z)
# [1 2 3 7 8 9]

z = np.concatenate([x, y], axis=0)# 按行
print(z)
# [1 2 3 7 8 9]

# 【例】原来x，y都是二维的，拼接后也是二维的

x = np.array([1, 2, 3]).reshape(1, 3)
y = np.array([7, 8, 9]).reshape(1, 3)
z = np.concatenate([x, y])
print(z)
# [[ 1  2  3]
#  [ 7  8  9]]
z = np.concatenate([x, y], axis=1)
print(z)
# [[ 1  2  3  7  8  9]]

# 【例】x，y在原来的维度上进行拼接

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.array([[7, 8, 9], [10, 11, 12]])
z = np.concatenate([x, y])
print(z)
# [[ 1  2  3]
#  [ 4  5  6]
#  [ 7  8  9]
#  [10 11 12]]
z = np.concatenate([x, y], axis=1)
print(z)
# [[ 1  2  3  7  8  9]
#  [ 4  5  6 10 11 12]]


# 沿着新的轴加入一系列数组（stack为增加维度的拼接）

x = np.array([1, 2, 3])
y = np.array([7, 8, 9])
z = np.stack([x, y])
print(z.shape)  # (2, 3)
print(z)
# [[1 2 3]
#  [7 8 9]]

z = np.stack([x, y], axis=1)
print(z.shape)  # (3, 2)
print(z)
# [[1 7]
#  [2 8]
#  [3 9]]

##################################################################

x = np.array([1, 2, 3]).reshape(1, 3)
y = np.array([7, 8, 9]).reshape(1, 3)
z = np.stack([x, y])
print(z.shape)  # (2, 1, 3)
print(z)
# [[[1 2 3]]
#
#  [[7 8 9]]]

z = np.stack([x, y], axis=1)
print(z.shape)  # (1, 2, 3)
print(z)
# [[[1 2 3]
#   [7 8 9]]]

z = np.stack([x, y], axis=2)
print(z.shape)  # (1, 3, 2)
print(z)
# [[[1 7]
#   [2 8]
#   [3 9]]]

##################################################################

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.array([[7, 8, 9], [10, 11, 12]])
z = np.stack([x, y])
print(z.shape)  # (2, 2, 3)
print(z)
# [[[ 1  2  3]
#   [ 4  5  6]]
# 
#  [[ 7  8  9]
#   [10 11 12]]]

z = np.stack([x, y], axis=1)
print(z.shape)  # (2, 2, 3)
print(z)
# [[[ 1  2  3]
#   [ 7  8  9]]
# 
#  [[ 4  5  6]
#   [10 11 12]]]

z = np.stack([x, y], axis=2)
print(z.shape)  # (2, 3, 2)
print(z)
# [[[ 1  7]
#   [ 2  8]
#   [ 3  9]]
# 
#  [[ 4 10]
#   [ 5 11]
#   [ 6 12]]]


# 【例】一维的情况
import numpy as np

x = np.array([1, 2, 3])
y = np.array([7, 8, 9])
z = np.vstack((x, y))
print(z.shape)  # (2, 3)
print(z)
# [[1 2 3]
#  [7 8 9]]

z = np.stack([x, y])
print(z.shape)  # (2, 3)
print(z)
# [[1 2 3]
#  [7 8 9]]

z = np.hstack((x, y))
print(z.shape)  # (6,)
print(z)
# [1  2  3  7  8  9]

z = np.concatenate((x, y))
print(z.shape)  # (6,)
print(z)  # [1 2 3 7 8 9]

# 【例】二维的情况

x = np.array([1, 2, 3]).reshape(1, 3)
y = np.array([7, 8, 9]).reshape(1, 3)
z = np.vstack((x, y))
print(z.shape)  # (2, 3)
print(z)
# [[1 2 3]
#  [7 8 9]]

z = np.concatenate((x, y), axis=0)
print(z.shape)  # (2, 3)
print(z)
# [[1 2 3]
#  [7 8 9]]

z = np.hstack((x, y))
print(z.shape)  # (1, 6)
print(z)
# [[ 1  2  3  7  8  9]]

z = np.concatenate((x, y), axis=1)
print(z.shape)  # (1, 6)
print(z)
# [[1 2 3 7 8 9]]

# 【例】二维的情况

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.array([[7, 8, 9], [10, 11, 12]])
z = np.vstack((x, y))
print(z.shape)  # (4, 3)
print(z)
# [[ 1  2  3]
#  [ 4  5  6]
#  [ 7  8  9]
#  [10 11 12]]

z = np.concatenate((x, y), axis=0)
print(z.shape)  # (4, 3)
print(z)
# [[ 1  2  3]
#  [ 4  5  6]
#  [ 7  8  9]
#  [10 11 12]]

z = np.hstack((x, y))
print(z.shape)  # (2, 6)
print(z)
# [[ 1  2  3  7  8  9]
#  [ 4  5  6 10 11 12]]

z = np.concatenate((x, y), axis=1)
print(z.shape)  # (2, 6)
print(z)
# [[ 1  2  3  7  8  9]
#  [ 4  5  6 10 11 12]]


import numpy as np

x = np.array([[11, 12, 13, 14],
              [16, 17, 18, 19],
              [21, 22, 23, 24]])
y = np.split(x, [1, 3])
print(y)
# [array([[11, 12, 13, 14]]), array([[16, 17, 18, 19],
#        [21, 22, 23, 24]]), array([], shape=(0, 4), dtype=int32)]

y = np.split(x, [1, 3], axis=1)# axis=1按列
print(y)
# [array([[11],
#        [16],
#        [21]]), array([[12, 13],
#        [17, 18],
#        [22, 23]]), array([[14],
#        [19],
#        [24]])]


import numpy as np

x = np.array([[11, 12, 13, 14],
              [16, 17, 18, 19],
              [21, 22, 23, 24]])
y = np.vsplit(x, 3)
print(y)
# [array([[11, 12, 13, 14]]), array([[16, 17, 18, 19]]), array([[21, 22, 23, 24]])]

y = np.split(x, 3)
print(y)
# [array([[11, 12, 13, 14]]), array([[16, 17, 18, 19]]), array([[21, 22, 23, 24]])]

y=np.hsplit(x,4)
print(y)
# [array([[11],
#        [16],
#        [21]]), array([[12],
#        [17],
#        [22]]), array([[13],
#        [18],
#        [23]]), array([[14],
#        [19],
#        [24]])]


import numpy as np

x = np.array([[1, 2], [3, 4]])
print(x)
# [[1 2]
#  [3 4]]

y = np.tile(x, (1, 3))
print(y)
# [[1 2 1 2 1 2]
#  [3 4 3 4 3 4]]

y = np.tile(x, (3, 1))
print(y)
# [[1 2]
#  [3 4]
#  [1 2]
#  [3 4]
#  [1 2]
#  [3 4]]

y = np.tile(x, (3, 3))
print(y)
# [[1 2 1 2 1 2]
#  [3 4 3 4 3 4]
#  [1 2 1 2 1 2]
#  [3 4 3 4 3 4]
#  [1 2 1 2 1 2]
#  [3 4 3 4 3 4]]


x = np.repeat(3, 4)
print(x)  # [3 3 3 3]

x = np.array([[1, 2], [3, 4]])
y = np.repeat(x, 2)
print(y)
# [1 1 2 2 3 3 4 4]

y = np.repeat(x, 2, axis=0)
print(y)
# [[1 2]
#  [1 2]
#  [3 4]
#  [3 4]]

y = np.repeat(x, 2, axis=1)
print(y)
# [[1 1 2 2]
#  [3 3 4 4]]

y = np.repeat(x, [2, 3], axis=0)# [2, 3] 表示在第一个轴（行）上，第一行重复 2 次，第二行重复 3 次
print(y)
# [[1 2]
#  [1 2]
#  [3 4]
#  [3 4]
#  [3 4]]

y = np.repeat(x, [2, 3], axis=1)
print(y)
# [[1 1 2 2 2]
#  [3 3 4 4 4]]


A = np.array([1,2,3])
B = np.array([1,2,3])

equal = np.allclose(A,B)
print(equal)
# True
equal = np.array_equal(A,B)
print(equal)
# True


# 查找只出现一次的唯一值
a=np.array([1,1,2,3,3,4,4])
b=np.unique(a,return_counts=True)
print(b)
# (array([1, 2, 3, 4]), array([2, 1, 2, 2]))
print(b[0][list(b[1]).index(1)])
# 2


a = np.array([0, 4, 5])
b = np.copy(a)
print(np.all(a == b))  # True
print(np.any(a == b))  # True

b[0] = 1
print(np.all(a == b))  # False
print(np.any(a == b))  # True

print(np.all([1.0, np.nan]))  # True
print(np.any([1.0, np.nan]))  # True


print(np.logical_not(3))  
# False
print(np.logical_not([True, False, 0, 1]))
# [False  True  True False]

print(np.logical_or(True, False))
# True
print(np.logical_and([True, False], [True, False]))# 对两个数组的对应元素执行逻辑操作。它逐元素比较两个数组，并返回一个新的布尔数组
# [ True False]
print(np.logical_xor([True, True, False, False], [True, False, True, False]))
# [False  True  True False]

x = np.arange(5)
print(np.logical_and(x > 1, x < 4))
# [False False  True  True False]


x = np.array([1, 2, 3, 4, 5, 6, 7, 8])

y = x > 2
print(y)
print(np.greater(x, 2))
# [False False  True  True  True  True  True  True]

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = x > 20
print(y)
print(np.greater(x, 20))
# [[False False False False False]
#  [False False False False False]
#  [ True  True  True  True  True]
#  [ True  True  True  True  True]
#  [ True  True  True  True  True]]

y = np.array([[32,28,31,33,37]
              [23,37,37,30,29]
              [32,24,10,33,15]
              [27,17,10,36,16]
              [25,32,23,39,34]])
z = x > y
print(z)
print(np.greater(x, y))
# [[False False False False False]
#  [False False False False False]
#  [False False  True False  True]
#  [False  True  True False  True]
#  [ True False  True False  True]]


#注意 numpy 的广播规则
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y=np.array([32,37,30,24,10])
# y 的形状为 (5,)，会被视为 (1, 5)，然后进一步扩展为 (5, 5)，以便与 x 的形状匹配
# y 被广播为：
# [[32, 37, 30, 24, 10],
#  [32, 37, 30, 24, 10],
#  [32, 37, 30, 24, 10],
#  [32, 37, 30, 24, 10],
#  [32, 37, 30, 24, 10]]
# 然后，x 和广播后的 y 进行逐元素比较
z = x > y
print(z)
print(np.greater(x, y))
# [[False False False False  True]
#  [False False False False  True]
#  [False False False False  True]
#  [False False False  True  True]
#  [False False  True  True  True]]


# 比较两个数组是否可以认为相等
import numpy as np

x = np.isclose([1e10, 1e-7], [1.00001e10, 1e-8])
print(x)  # [ True False]

x = np.allclose([1e10, 1e-7], [1.00001e10, 1e-8])
print(x)  # False

x = np.isclose([1e10, 1e-8], [1.00001e10, 1e-9])
print(x)  # [ True  True]

x = np.allclose([1e10, 1e-8], [1.00001e10, 1e-9])
print(x)  # True

x = np.isclose([1.0, np.nan], [1.0, np.nan])
print(x)  # [ True False]

x = np.allclose([1.0, np.nan], [1.0, np.nan])
print(x)  # False

x = np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
print(x)  # [ True  True]

x = np.allclose([1.0, np.nan], [1.0, np.nan], equal_nan=True)
print(x)  # True


# 获取a和b元素匹配的位置
a = np.array([1, 2, 3, 2, 3, 4, 3, 4, 5, 6])
b = np.array([7, 2, 10, 2, 7, 4, 9, 4, 9, 8])
mask=np.equal(a,b) 
print(np.where(mask)) # np.where(mask) 返回一个元组，其中包含 mask 中值为 True 的索引

# 获取5到10 之间的所有元素
a = np.array([2, 6, 1, 9, 10, 3, 27])
mask=np.logical_and(np.less_equal(a,10),np.greater_equal(a,5))
print(np.where(mask)) #对应的索引
x = np.where(mask)
print(a[x])  # [ 6  9 10]

(array([1, 3, 5, 7]),)
(array([1, 3, 4]),)
[ 6  9 10]


import numpy as np

x = np.arange(4).reshape(4, 1)
y = np.ones(5)

print(x.shape)  # (4, 1)
# [[0.]
#  [1.]
#  [2.]
#  [3.]]
print(y.shape)  # (5,)
# [1. 1. 1. 1. 1.]

print((x + y).shape)  # (4, 5)
# x:
# [[0. 0. 0. 0. 0.]
#  [1. 1. 1. 1. 1.]
#  [2. 2. 2. 2. 2.]
#  [3. 3. 3. 3. 3.]]
# y:
# [[1. 1. 1. 1. 1.]
#  [1. 1. 1. 1. 1.]
#  [1. 1. 1. 1. 1.]
#  [1. 1. 1. 1. 1.]]

print(x + y)
# [[1. 1. 1. 1. 1.]
#  [2. 2. 2. 2. 2.]
#  [3. 3. 3. 3. 3.]
#  [4. 4. 4. 4. 4.]]

# 不匹配报错的例子

x = np.arange(4)
y = np.ones(5)

print(x.shape)  # (4,)
print(y.shape)  # (5,)

print(x + y)
# ValueError: operands could not be broadcast together with shapes (4,) (5,)


import numpy as np
# 一维数组
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = x / 2
print(y)
print(np.divide(x, 2))
# [0.5 1.  1.5 2.  2.5 3.  3.5 4. ]

y = x ** 2
print(y)
print(np.power(x, 2))
# [ 1  4  9 16 25 36 49 64]

#二维数组
x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = x // 2
print(y)
print(np.floor_divide(x, 2))
# [[ 5  6  6  7  7]
#  [ 8  8  9  9 10]
#  [10 11 11 12 12]
#  [13 13 14 14 15]
#  [15 16 16 17 17]]

#一维与二维
y = np.arange(1, 6)
print(y)
# [1 2 3 4 5]

z = x + y
print(z)
print(np.add(x, y))
# [[12 14 16 18 20]
#  [17 19 21 23 25]
#  [22 24 26 28 30]
#  [27 29 31 33 35]
#  [32 34 36 38 40]]

#二维与二维
y = np.arange(1, 26).reshape([5, 5])
print(y)
# [[ 1  2  3  4  5]
#  [ 6  7  8  9 10]
#  [11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]]

z = x - y
print(z)
print(np.subtract(x, y))
# [[10 10 10 10 10]
#  [10 10 10 10 10]
#  [10 10 10 10 10]
#  [10 10 10 10 10]
#  [10 10 10 10 10]]


import numpy as np

x = np.arange(1, 5)
print(x)  # [1 2 3 4]

y = np.sqrt(x)
print(y)
# [1.         1.41421356 1.73205081 2.        ]
print(np.power(x, 0.5))
# [1.         1.41421356 1.73205081 2.        ]

y = np.square(x)
print(y)
# [ 1  4  9 16]
print(np.power(x, 2))
# [ 1  4  9 16]


import numpy as np

x = np.linspace(start=0, stop=np.pi*2, num=5)
print(x)
# [0.         1.57079633 3.14159265 4.71238898 6.28318531] 即0,pi/2,pi,2pi/3,2*pi

y = np.sin(x)
print(y)
# [ 0.0000000e+00  1.0000000e+00  1.2246468e-16 -1.0000000e+00
#  -2.4492936e-16]


import numpy as np

x = np.arange(1, 5)
print(x)
# [1 2 3 4]
y = np.exp(x)
print(y)
# [ 2.71828183  7.3890561  20.08553692 54.59815003]
z = np.log(y)
print(z)
# [1. 2. 3. 4.]


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.sum(x)
print(y)  # 575

y = np.sum(x, axis=0)
print(y)  # [105 110 115 120 125]

y = np.sum(x, axis=1)
print(y)  # [ 65  90 115 140 165]

###############################################################
y = np.cumsum(x)
print(y)
# [ 11  23  36  50  65  81  98 116 135 155 176 198 221 245 270 296 323 351
#  380 410 441 473 506 540 575] 从数组的第一个元素开始，依次累加每个元素的值，生成一个新的数组

y = np.cumsum(x, axis=0)
print(y)
# [[ 11  12  13  14  15]
#  [ 27  29  31  33  35]
#  [ 48  51  54  57  60]
#  [ 74  78  82  86  90]
#  [105 110 115 120 125]] 以列累加，生成一个新的数组

y = np.cumsum(x, axis=1)
print(y)
# [[ 11  23  36  50  65]
#  [ 16  33  51  70  90]
#  [ 21  43  66  90 115]
#  [ 26  53  81 110 140]
#  [ 31  63  96 130 165]] 以行累加，生成一个新的数组


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.prod(x)
print(y)  # 788529152

y = np.prod(x, axis=0)
print(y)
# [2978976 3877632 4972968 6294624 7875000]

y = np.prod(x, axis=1)
print(y)
# [  360360  1860480  6375600 17100720 38955840]

###############################################################
y = np.cumprod(x)
print(y)
# [         11         132        1716       24024      360360     5765760
#     98017920  1764322560  -837609728   427674624   391232512    17180672
#    395155456   893796352   870072320  1147043840   905412608  -418250752
#    755630080  1194065920 -1638662144  -897581056   444596224 -2063597568
#    788529152]

y = np.cumprod(x, axis=0)
print(y)
# [[     11      12      13      14      15]
#  [    176     204     234     266     300]
#  [   3696    4488    5382    6384    7500]
#  [  96096  121176  150696  185136  225000]
#  [2978976 3877632 4972968 6294624 7875000]]

y = np.cumprod(x, axis=1)
print(y)
# [[      11      132     1716    24024   360360]
#  [      16      272     4896    93024  1860480]
#  [      21      462    10626   255024  6375600]
#  [      26      702    19656   570024 17100720]
#  [      31      992    32736  1113024 38955840]]


import numpy as np

A = np.arange(2, 14).reshape((3, 4))
A[1, 1] = 8
print(A)
# [[ 2  3  4  5]
#  [ 6  8  8  9]
#  [10 11 12 13]]
print(np.diff(A))
# [[1 1 1]
#  [2 0 1]
#  [1 1 1]]
print(np.diff(A, axis=0))
# [[4 5 4 4]
#  [4 3 4 4]]


# 在一维数组中找到所有的局部极大值（或峰值）
import numpy as np

a = np.array([1, 3, 7, 1, 2, 6, 0, 1])
b1 = np.diff(a)
# [ 2  4 -6  1  4 -6  1]
b2 = np.sign(b1)
# [ 1  1 -1  1  1 -1  1]
b3 = np.diff(b2)
# [ 0 -2  2  0 -2  2] 

index = np.where(np.equal(b3, -2))[0] + 1

print(index) # [2 5]


import numpy as np

x = np.random.rand(3, 3) * 10
print(x)
# [[6.59144457 3.78566113 8.15321227]
#  [1.68241475 3.78753332 7.68886328]
#  [2.84255822 9.58106727 7.86678037]]

y = np.around(x)
print(y)
# [[ 7.  4.  8.]
#  [ 2.  4.  8.]
#  [ 3. 10.  8.]]

y = np.around(x, decimals=2)
print(y)
# [[6.59 3.79 8.15]
#  [1.68 3.79 7.69]
#  [2.84 9.58 7.87]]


import numpy as np

x = np.random.rand(3, 3) * 10
print(x)
# [[0.67847795 1.33073923 4.53920122]
#  [7.55724676 5.88854047 2.65502046]
#  [8.67640444 8.80110812 5.97528726]]

y = np.ceil(x)
print(y)
# [[1. 2. 5.]
#  [8. 6. 3.]
#  [9. 9. 6.]]

y = np.floor(x)
print(y)
# [[0. 1. 4.]
#  [7. 5. 2.]
#  [8. 8. 5.]]


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.clip(x, a_min=20, a_max=30)
print(y)
# [[20 20 20 20 20]
#  [20 20 20 20 20]
#  [21 22 23 24 25]
#  [26 27 28 29 30]
#  [30 30 30 30 30]]


import numpy as np

x = np.arange(-5, 5)
print(x)
# [-5 -4 -3 -2 -1  0  1  2  3  4]

y = np.abs(x)
print(y)
# [5 4 3 2 1 0 1 2 3 4]

y = np.absolute(x)
print(y)
# [5 4 3 2 1 0 1 2 3 4]


x = np.arange(-5, 5)
print(x)
#[-5 -4 -3 -2 -1  0  1  2  3  4]
print(np.sign(x))
#[-1 -1 -1 -1 -1  0  1  1  1  1]


import numpy as np
arr1 = np.random.randint(1,10,10)
arr2 = np.random.randint(1,10,10)

print(f"arr1的平均数为:{np.mean(arr1)}")
print(f"arr1的中位数为:{np.median(arr1)}" )
print(f"arr1的方差为:{np.var(arr1)}" )
print(f"arr1的标准差为:{np.std(arr1)}" )
print(f"arr1,arr的相关性矩阵为:{np.cov(arr1,arr2)}" )
print(f"arr1,arr的协方差矩阵为:{np.corrcoef(arr1,arr2)}" )


import numpy as np

np.random.seed(20200612) # 设置随机数生成器的种子，保证每次运行代码时生成的随机数是相同的
x = np.random.rand(5, 5) * 10
x = np.around(x, 2)
print(x)
# [[2.32 7.54 9.78 1.73 6.22]
#  [6.93 5.17 9.28 9.76 8.25]
#  [0.01 4.23 0.19 1.73 9.27]
#  [7.99 4.97 0.88 7.32 4.29]
#  [9.05 0.07 8.95 7.9  6.99]]

y = np.sort(x)
print(y)
# [[1.73 2.32 6.22 7.54 9.78]
#  [5.17 6.93 8.25 9.28 9.76]
#  [0.01 0.19 1.73 4.23 9.27]
#  [0.88 4.29 4.97 7.32 7.99]
#  [0.07 6.99 7.9  8.95 9.05]]

y = np.sort(x, axis=0)
print(y)
# [[0.01 0.07 0.19 1.73 4.29]
#  [2.32 4.23 0.88 1.73 6.22]
#  [6.93 4.97 8.95 7.32 6.99]
#  [7.99 5.17 9.28 7.9  8.25]
#  [9.05 7.54 9.78 9.76 9.27]]

y = np.sort(x, axis=1)
print(y)
# 同y = np.sort(x)


dt = np.dtype([('name', 'S10'), ('age', np.int)])
a = np.array([("Mike", 21), ("Nancy", 25), ("Bob", 17), ("Jane", 27)], dtype=dt)
b = np.sort(a, order='name')
print(b)
# [(b'Bob', 17) (b'Jane', 27) (b'Mike', 21) (b'Nancy', 25)]

b = np.sort(a, order='age')
print(b)
# [(b'Bob', 17) (b'Mike', 21) (b'Nancy', 25) (b'Jane', 27)]


import numpy as np
## 一维
np.random.seed(20200612)
x = np.random.randint(0, 10, 10) # 生成整数随机数(0-9的随机10个整数)([low,high),size)
print(x)
# [6 1 8 5 5 4 1 2 9 1]

y = np.argsort(-x)
print(y)
# [8 2 0 3 4 5 7 1 6 9]

print(x[y])
# [9 8 6 5 5 4 2 1 1 1]

## 二维
x = np.random.rand(5, 5) * 10
x = np.around(x, 2)
print(x)
# [[2.32 7.54 9.78 1.73 6.22]
#  [6.93 5.17 9.28 9.76 8.25]
#  [0.01 4.23 0.19 1.73 9.27]
#  [7.99 4.97 0.88 7.32 4.29]
#  [9.05 0.07 8.95 7.9  6.99]]

y = np.argsort(x)
print(y)
# [[3 0 4 1 2]
#  [1 0 4 2 3]
#  [0 2 3 1 4]
#  [2 4 1 3 0]
#  [1 4 3 2 0]]

y = np.argsort(x, axis=0)
print(y)
# [[2 4 2 0 3]
#  [0 2 3 2 0]
#  [1 3 4 3 4]
#  [3 1 1 4 1]
#  [4 0 0 1 2]]

y = np.argsort(x, axis=1)
print(y)
# 同y = np.argsort(x)

# 根据第二列排序
Z = np.random.randint(0,10,(3,3))
print(Z)
print (Z[Z[:,2].argsort()])


import numpy as np
np.random.seed(20200612)
x = np.random.rand(5, 5) * 10
x = np.around(x, 2)
print(x)
# [[2.32 7.54 9.78 1.73 6.22]
#  [6.93 5.17 9.28 9.76 8.25]
#  [0.01 4.23 0.19 1.73 9.27]
#  [7.99 4.97 0.88 7.32 4.29]
#  [9.05 0.07 8.95 7.9  6.99]]

## 一个键
index = np.lexsort([x[:, 0]]) # 按照第一列(即索引为 0 的列)进行排序
print(index)
# [2 0 1 3 4]
y = x[index]
print(y)
# [[0.01 4.23 0.19 1.73 9.27]
#  [2.32 7.54 9.78 1.73 6.22]
#  [6.93 5.17 9.28 9.76 8.25]
#  [7.99 4.97 0.88 7.32 4.29]
#  [9.05 0.07 8.95 7.9  6.99]]

##降序
index = np.lexsort([-1 * x[:, 0]])
print(index)
# [4 3 1 0 2]
y = x[index]
print(y)
# [[9.05 0.07 8.95 7.9  6.99]
#  [7.99 4.97 0.88 7.32 4.29]
#  [6.93 5.17 9.28 9.76 8.25]
#  [2.32 7.54 9.78 1.73 6.22]
#  [0.01 4.23 0.19 1.73 9.27]]

###########################################

## 主键与次键
x=np.array([[1,2,3,4],[1,4,3,3]])
y=np.lexsort((x[0,:],x[1,:])) # 按照第二行进行排序(即索引为 1 的行),若相同,再按照第一行(即索引为 0 的行)进行排序
print(y)
print(x[:,y])


import numpy as np

np.random.seed(100)
x = np.random.randint(1, 30, [8, 3]) # [1-30)的8*3的二维整数数组
print(x)
# [[ 9 25  4]
#  [ 8 24 16]
#  [17 11 21]
#  [ 3 22  3]
#  [ 3 15  3]
#  [18 17 25]
#  [16  5 12]
#  [29 27 17]]

z = np.partition(x, kth=2, axis=0) # 对于每一列，将第 kth=2 小的元素移动到该列的前 kth 个位置（包括第 kth 个位置本身），而较大的元素移动到后面。
print(z)
# [[ 3  5  3]
#  [ 3 11  3]
#  [ 8 15  4]
#  [ 9 22 21]
#  [17 24 16]
#  [18 17 25]
#  [16 25 12]
#  [29 27 17]]

# 【例】选取每一列第三小的数
z = np.partition(x, kth=2, axis=0)
print(z[2])
# [ 8 15  4]

# 【例】选取每一列第三大的数据
z = np.partition(x, kth=-3, axis=0)
print(z[-3])
# [17 24 17]


import numpy as np

np.random.seed(20200612)
x = np.random.rand(5, 5) * 10
x = np.around(x, 2)
print(x)
# [[2.32 7.54 9.78 1.73 6.22]
#  [6.93 5.17 9.28 9.76 8.25]
#  [0.01 4.23 0.19 1.73 9.27]
#  [7.99 4.97 0.88 7.32 4.29]
#  [9.05 0.07 8.95 7.9  6.99]]

y = np.argmax(x)
print(y)  # 2

y = np.argmax(x, axis=0)
print(y)
# [4 0 0 1 2]

y = np.argmax(x, axis=1)
print(y)
# [2 3 4 0 0]


# 一维数组
import numpy as np

x = np.array([0, 2, 3])
print(x)  # [0 2 3]
print(x.shape)  # (3,)
print(x.ndim)  # 1

y = np.nonzero(x)
print(y)  # (array([1, 2], dtype=int64),)
print(np.array(y))  # [[1 2]]
print(np.array(y).shape)  # (1, 2)
print(np.array(y).ndim)  # 2
print(np.transpose(y))
# [[1]
#  [2]]
print(x[np.nonzero(x)])
#[2, 3]

#########################################
# 二维数组

x = np.array([[3, 0, 0], [0, 4, 0], [5, 6, 0]])
print(x)
# [[3 0 0]
#  [0 4 0]
#  [5 6 0]]
print(x.shape)  # (3, 3)
print(x.ndim)  # 2

y = np.nonzero(x)
print(y) # (array([0, 1, 2, 2], dtype=int64), array([0, 1, 0, 1], dtype=int64))
print(np.array(y))
# [[0 1 2 2]
#  [0 1 0 1]] 即索引[0,0][1,1][2,0][2,1]
print(np.array(y).shape)  # (2, 4)
print(np.array(y).ndim)  # 2

y = x[np.nonzero(x)]
print(y)  # [3 4 5 6]

y = np.transpose(np.nonzero(x))
print(y)
# [[0 0]
#  [1 1]
#  [2 0]
#  [2 1]] 即索引[0,0][1,1][2,0][2,1]
#########################################
# 三维数组

x = np.array([[[0, 1], [1, 0]], [[0, 1], [1, 0]], [[0, 0], [1, 0]]])
print(x)
# [[[0 1]
#   [1 0]]
#
#  [[0 1]
#   [1 0]]
#
#  [[0 0]
#   [1 0]]]
print(np.shape(x))  # (3, 2, 2)
print(x.ndim)  # 3

y = np.nonzero(x)
print(np.array(y))
# [[0 0 1 1 2]
#  [0 1 0 1 1]
#  [1 0 1 0 0]]
print(np.array(y).shape)  # (3, 5)
print(np.array(y).ndim)  # 2
print(y)
# (array([0, 0, 1, 1, 2], dtype=int64), array([0, 1, 0, 1, 1], dtype=int64), array([1, 0, 1, 0, 0], dtype=int64))
print(x[np.nonzero(x)])
#[1 1 1 1 1]


# nonzero()将布尔数组转换成整数数组进行操作

import numpy as np

x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(x)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]

y = x > 3
print(y)
# [[False False False]
#  [ True  True  True]
#  [ True  True  True]]

y = np.nonzero(x > 3)
print(y)
# (array([1, 1, 1, 2, 2, 2], dtype=int64), array([0, 1, 2, 0, 1, 2], dtype=int64))

y = x[np.nonzero(x > 3)]
print(y)
# [4 5 6 7 8 9]

y = x[x > 3]
print(y)
# [4 5 6 7 8 9]


# 满足条件condition，输出x，不满足输出y
import numpy as np

x = np.array([[0, 1, 2],
              [0, 2, 4],
              [0, 3, 6]])
y = np.where(x < 4, x, -1) # 不满足输出-1
print(y)
# [[ 0  1  2]
#  [ 0  2 -1]
#  [ 0  3 -1]]

# 提取所有奇数
index = np.where(x % 2 == 1) # 这里返回索引
print(x[index])
# [1 3]

#####################################################################################################
# 只有condition，没有x和y，则输出满足条件 (即非0) 元素的坐标 (等价于numpy.nonzero)
# 这里的坐标以tuple的形式给出，通常原数组有多少维，输出的tuple中就包含几个数组，分别对应符合条件元素的各维坐标
x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
y = np.where(x > 5)
print(y)
# (array([5, 6, 7], dtype=int64),)
print(x[y])
# [6 7 8]

y = np.nonzero(x > 5)
print(y)
# (array([5, 6, 7], dtype=int64),)
print(x[y])
# [6 7 8]


import numpy as np

x = np.array([0, 1, 5, 9, 11, 18, 26, 33]) # 必须为升序，否则sorter不应为空

y = np.searchsorted(x, 11)# 返回应该插入位置的索引
print(y)  # 4

y = np.searchsorted(x, 11, side='right')# 插入位置尽可能靠右，适用数组有相同值
print(y)  # 5

##########################################################################
y = np.searchsorted(x, [-1, 0, 11, 15, 33, 35])
print(y)  # [0 0 4 5 7 8]

y = np.searchsorted(x, [-1, 0, 11, 15, 33, 35], side='right')
print(y)  # [0 1 5 5 8 8]

##########################################################################
# sorter不为空的情况：sorter一维数组，对应数组元素的升序索引
np.random.shuffle(x) # 对x随机打乱
print(x)  # [33  1  9 18 11 26  0  5]

x_sort = np.argsort(x)
print(x_sort)  # [6 1 7 2 4 3 5 0]

y = np.searchsorted(x, [-1, 0, 11, 15, 33, 35], sorter=x_sort)
print(y)  # [0 0 4 5 7 8]

y = np.searchsorted(x, [-1, 0, 11, 15, 33, 35], side='right', sorter=x_sort)
print(y)  # [0 1 5 5 8 8]


import numpy as np

x = np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]])
print(x)  # 5

x = np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=0)
print(x)  # [1 1 1 1 1]

x = np.count_nonzero([[0, 1, 7, 0, 0], [3, 0, 0, 2, 19]], axis=1)
print(x)  # [2 3]


# 找出数组中的唯一值并返回已排序的结果

import numpy as np

x = np.unique([1, 1, 3, 2, 3, 3])
print(x)  # [1 2 3]

x = sorted(set([1, 1, 3, 2, 3, 3])) # set() 转换为集合,将列表 [1, 1, 3, 2, 3, 3] 转换为集合时，重复的元素会被移除
print(x)  # [1, 2, 3]

x = np.array([[1, 1], [2, 3]])
u = np.unique(x)
print(u)  # [1 2 3]

x = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
y = np.unique(x, axis=0) # 去除重复行
print(y)
# [[1 0 0]
#  [2 3 4]]

x = np.array(['a', 'b', 'b', 'c', 'a'])
u, index = np.unique(x, return_index=True)
print(u)  # ['a' 'b' 'c']
print(index)  # [0 1 3]
print(x[index])  # ['a' 'b' 'c']

x = np.array([1, 2, 6, 4, 2, 3, 2])
u, index = np.unique(x, return_inverse=True)
print(u)  # [1 2 3 4 6]
print(index)  # [0 1 4 3 1 2 1]
print(u[index])  # [1 2 6 4 2 3 2]

u, count = np.unique(x, return_counts=True)
print(u)  # [1 2 3 4 6]
print(count)  # [1 3 1 1 1]


import numpy as np

test = np.array([0, 1, 2, 5, 0])
states = [0, 2]
mask = np.in1d(test, states)
print(mask)  # [ True False  True False  True]
print(test[mask])  # [0 2 0]

mask = np.in1d(test, states, invert=True) # 反转为不存在为True
print(mask)  # [False  True False  True False]
print(test[mask])  # [1 5]


# 求两个数组的唯一化+求交集+排序函数

import numpy as np
from functools import reduce

x = np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
print(x)  # [1 3]

x = np.array([1, 1, 2, 3, 4])
y = np.array([2, 1, 4, 6])
xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) # 返回索引
# x_ind 是交集元素在数组 x 中的索引
# y_ind 是交集元素在数组 y 中的索引
print(xy)  # [1 2 4]
print(x_ind)  # [0 2 4]
print(y_ind)  # [1 0 2]
print(x[x_ind])  # [1 2 4]
print(y[y_ind])  # [1 2 4]

x = reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
print(x)  # [3] 
# reduce 函数会对 arrays 中的数组依次应用 np.intersect1d,这里相当于找出所有集合的交集


import numpy as np
from functools import reduce

x = np.union1d([-1, 0, 1], [-2, 0, 2])
print(x)  # [-2 -1  0  1  2]
x = reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
print(x)  # [1 2 3 4 6]


import numpy as np

a = np.array([1, 2, 3, 2, 4, 1])
b = np.array([3, 4, 5, 6])
x = np.setdiff1d(a, b)
print(x)  # [1 2]


import numpy as np

a = np.array([1, 2, 3, 2, 4, 1])
b = np.array([3, 4, 5, 6])
x = np.setxor1d(a, b)
print(x)  # [1 2 5 6]


import numpy as np

file = r'.\test.npy'
np.random.seed(20200619)
x = np.random.uniform(low=0, high=1,size = [3, 5])
np.save(file, x)
y = np.load(file)
print(y)
# [[0.01123594 0.66790705 0.50212171 0.7230908  0.61668256]
#  [0.00668332 0.1234096  0.96092409 0.67925305 0.38596837]
#  [0.72342998 0.26258324 0.24318845 0.98795012 0.77370715]]


import numpy as np

file = r'.\test.npz'
x = np.linspace(0, np.pi, 5)
y = np.sin(x)
z = np.cos(x)
np.savez(file, x, y, z_d=z)
data = np.load(file)
np.set_printoptions(suppress=True)
print(data.files)  
# ['z_d', 'arr_0', 'arr_1']

print(data['arr_0'])
# [0.         0.78539816 1.57079633 2.35619449 3.14159265]

print(data['arr_1'])
# [0.         0.70710678 1.         0.70710678 0.        ]

print(data['z_d'])
# [ 1.          0.70710678  0.         -0.70710678 -1.        ]

# 用解压软件打开 test.npz 文件，会发现其中有三个文件：arr_0.npy,arr_1.npy,z_d.npy，其中分别保存着数组x,y,z的内容。


import numpy as np

file = r'.\test.txt'
x = np.arange(0, 10).reshape(2, -1)
np.savetxt(file, x)
y = np.loadtxt(file)
print(y)
# [[0. 1. 2. 3. 4.]
#  [5. 6. 7. 8. 9.]]

# test.txt文件如下：
# 0.000000000000000000e+00 1.000000000000000000e+00 2.000000000000000000e+00 3.000000000000000000e+00 4.000000000000000000e+00
# 5.000000000000000000e+00 6.000000000000000000e+00 7.000000000000000000e+00 8.000000000000000000e+00 9.000000000000000000e+00

file = r'.\test.csv'
x = np.arange(0, 10, 0.5).reshape(4, -1)
np.savetxt(file, x, fmt='%.3f', delimiter=',')
y = np.loadtxt(file, delimiter=',')
print(y)
# [[0.  0.5 1.  1.5 2. ]
#  [2.5 3.  3.5 4.  4.5]
#  [5.  5.5 6.  6.5 7. ]
#  [7.5 8.  8.5 9.  9.5]]

# test.csv文件如下：
# 0.000,0.500,1.000,1.500,2.000
# 2.500,3.000,3.500,4.000,4.500
# 5.000,5.500,6.000,6.500,7.000
# 7.500,8.000,8.500,9.000,9.500


## data.csv文件（不带缺失值）
# id,value1,value2,value3
# 1,123,1.4,23
# 2,110,0.5,18
# 3,164,2.1,19
import numpy as np

file = r'.\data.csv'
x = np.loadtxt(file, delimiter=',', skiprows=1, usecols=(1, 2)) #跳过第一行,选择二三列
print(x)
# [[123.    1.4]
#  [110.    0.5]
#  [164.    2.1]]

val1, val2 = np.loadtxt(file, delimiter=',', skiprows=1, usecols=(1, 2), unpack=True)
print(val1)  # [123. 110. 164.]
print(val2)  # [1.4 0.5 2.1]

x = np.genfromtxt(file, delimiter=',', names=True)
print(x)
# [(1., 123., 1.4, 23.) (2., 110., 0.5, 18.) (3., 164., 2.1, 19.)]

print(type(x))  
# <class 'numpy.ndarray'>

print(x.dtype)
# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]

print(x['id'])  # [1. 2. 3.]
print(x['value1'])  # [123. 110. 164.]
print(x['value2'])  # [1.4 0.5 2.1]
print(x['value3'])  # [23. 18. 19.]

## data1.csv文件（带有缺失值）
# id,value1,value2,value3
# 1,123,1.4,23
# 2,110,,18
# 3,,2.1,19

file = r'.\data1.csv'
x = np.genfromtxt(file, delimiter=',', names=True)
print(x)
# [(1., 123., 1.4, 23.) (2., 110., nan, 18.) (3.,  nan, 2.1, 19.)]

print(type(x))  
# <class 'numpy.ndarray'>

print(x.dtype)
# [('id', '<f8'), ('value1', '<f8'), ('value2', '<f8'), ('value3', '<f8')]

print(x['id'])  # [1. 2. 3.]
print(x['value1'])  # [123. 110.  nan]
print(x['value2'])  # [1.4 nan 2.1]
print(x['value3'])  # [23. 18. 19.]


import numpy as np

np.set_printoptions(precision=4) # 小数位为4
x = np.array([1.123456789])
print(x)  # [1.1235]

np.set_printoptions(threshold=20) # 显示的阈值
x = np.arange(50)
print(x)  # [ 0  1  2 ... 47 48 49]

np.set_printoptions(threshold=np.iinfo(np.int).max) # 显示所有元素,可以看作固定搭配
print(x)
# [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#  24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
#  48 49]

eps = np.finfo(float).eps # 返回浮点数类型 float 的机器精度，即两个相邻浮点数之间的最小间隔。对于大多数系统约为 2.220446049250313e-16
x = np.arange(4.)
x = x ** 2 - (x + eps) ** 2
print(x)  
# [-4.9304e-32 -4.4409e-16  0.0000e+00  0.0000e+00]

np.set_printoptions(suppress=True) # 不使用科学计数法
print(x)  # [-0. -0.  0.  0.]

x = np.linspace(0, 10, 10)
print(x)
# [ 0.      1.1111  2.2222  3.3333  4.4444  5.5556  6.6667  7.7778  8.8889
#  10.    ]
np.set_printoptions(precision=2, suppress=True, threshold=5)
print(x)  # [ 0.    1.11  2.22 ...  7.78  8.89 10.  ]

np.set_printoptions(formatter={'all': lambda x: 'int: ' + str(-x)}) #将输入值 x 转换为字符串，并在前面加上 'int: '，同时将 x 取反
x = np.arange(3)
print(x)  # [int: 0 int: -1 int: -2]

np.set_printoptions()  # formatter 重置了
print(x)  # [0 1 2]

# 恢复默认选项
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=75,
                    nanstr='nan', precision=8, suppress=False, 
                    threshold=1000, formatter=None)


import numpy as np

x = np.get_printoptions()
print(x)
# {
# 'edgeitems': 3, 
# 'threshold': 1000, 
# 'floatmode': 'maxprec', 
# 'precision': 8, 
# 'suppress': False, 
# 'linewidth': 75, 
# 'nanstr': 'nan', 
# 'infstr': 'inf', 
# 'sign': '-', 
# 'formatter': None, 
# 'legacy': False
# }


# 野外正在进行9（n=9）口石油勘探井的发掘工作，每一口井能够开发出油的概率是0.1（p=0.1）。请问，最终所有的勘探井都勘探失败的概率？

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(20200605)
n = 9# 做某件事情的次数
p = 0.1# 做某件事情成功的概率
size = 50000
x = np.random.binomial(n, p, size)
'''或者使用binom.rvs
#使用binom.rvs(n, p, size=1)函数模拟一个二项随机变量,可视化地表现概率
y = stats.binom.rvs(n, p, size=size)#返回一个numpy.ndarray
'''
print(np.sum(x == 0) / size)  # 0.3897

plt.hist(x)
plt.xlabel('随机变量：成功次数')
plt.ylabel('样本中出现的次数')
plt.show()
#它返回一个列表，列表中每个元素表示随机变量中对应值的概率
s = stats.binom.pmf(range(10), n, p)
print(np.around(s, 3))
# [0.387 0.387 0.172 0.045 0.007 0.001 0.    0.    0.    0.   ]


# 假定某航空公司预定票处平均每小时接到42次订票电话，那么10分钟内恰好接到6次电话的概率是多少？

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

np.random.seed(20200605)
lam = 42 / 6# 平均值：平均每十分钟接到42/6次订票电话
size = 50000
x = np.random.poisson(lam, size)
'''或者
#模拟服从泊松分布的50000个随机变量
x = stats.poisson.rvs(lam,size=size)
'''
print(np.sum(x == 6) / size)  # 0.14988

plt.hist(x)
plt.xlabel('随机变量：每十分钟接到订票电话的次数')
plt.ylabel('50000个样本中出现的次数')
plt.show()
#用poisson.pmf(k, mu)求对应分布的概率:概率质量函数 (PMF)
x = stats.poisson.pmf(6, lam)
print(x)  # 0.14900277967433773


# 一共20只动物里有7只是狗，抽取12只有3只狗的概率（无放回抽样）。

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

np.random.seed(20200605)
size = 500000
x = np.random.hypergeometric(ngood=7, nbad=13, nsample=12, size=size)
'''或者
#用rvs(M, n, N, loc=0, size=1, random_state=None)模拟
x = stats.hypergeom.rvs(M=20,n=7,N=12,size=size)
'''
print(np.sum(x == 3) / size)  # 0.198664

plt.hist(x, bins=8)
plt.xlabel('狗的数量')
plt.ylabel('50000个样本中出现的次数')
plt.title('超几何分布',fontsize=20)
plt.show()

"""
M 为总体容量
n 为总体中具有成功标志的元素的个数
N,k 表示抽取N个元素有k个是成功元素
"""
x = range(8)
#用hypergeom.pmf(k, M, n, N, loc)来计算k次成功的概率
s = stats.hypergeom.pmf(k=x, M=20, n=7, N=12)
print(np.round(s, 3))
# [0.    0.004 0.048 0.199 0.358 0.286 0.095 0.01 ]


# 在low到high范围内，创建大小为size的均匀分布的随机数。

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(20200614)
a = 0
b = 100
size = 50000
x = np.random.uniform(a, b, size=size)
print(np.all(x >= 0))  # True
print(np.all(x < 100))  # True
y = (np.sum(x < 50) - np.sum(x < 10)) / size
print(y)  # 0.40144

plt.hist(x, bins=20)
plt.show()

a = stats.uniform.cdf(10, 0, 100)
b = stats.uniform.cdf(50, 0, 100)
print(b - a)  # 0.4


# 根据指定大小产生满足标准正态分布的数组（均值为0，标准差为1）

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(20200614)
size = 50000
x = np.random.randn(size)
y1 = (np.sum(x < 1) - np.sum(x < -1)) / size
y2 = (np.sum(x < 2) - np.sum(x < -2)) / size
y3 = (np.sum(x < 3) - np.sum(x < -3)) / size
print(y1)  # 0.68596
print(y2)  # 0.95456
print(y3)  # 0.99744

plt.hist(x, bins=20)
plt.show()

y1 = stats.norm.cdf(1) - stats.norm.cdf(-1)
y2 = stats.norm.cdf(2) - stats.norm.cdf(-2)
y3 = stats.norm.cdf(3) - stats.norm.cdf(-3)
print(y1)  # 0.6826894921370859
print(y2)  # 0.9544997361036416
print(y3)  # 0.9973002039367398


# scale = 1/lambda

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(20200614)
lam = 7
size = 50000
x = np.random.exponential(1 / lam, size)
'''或者
#rvs(loc=0, scale=1/lam, size=size, random_state=None)模拟
'''
y1 = (np.sum(x < 1 / 7)) / size
y2 = (np.sum(x < 2 / 7)) / size
y3 = (np.sum(x < 3 / 7)) / size
print(y1)  # 0.63218
print(y2)  # 0.86518
print(y3)  # 0.95056

plt.hist(x, bins=20)
plt.show()

y1 = stats.expon.cdf(1 / 7, scale=1 / lam)
y2 = stats.expon.cdf(2 / 7, scale=1 / lam)
y3 = stats.expon.cdf(3 / 7, scale=1 / lam)
print(y1)  # 0.6321205588285577
print(y2)  # 0.8646647167633873
print(y3)  # 0.950212931632136


import numpy as np

np.random.seed(20200614)
x = np.random.choice(10, 3)
print(x)  # [2 0 1]

x = np.random.choice(10, 3, p=[0.05, 0, 0.05, 0.9, 0, 0, 0, 0, 0, 0])
print(x)  # [3 2 3]

x = np.random.choice(10, 3, replace=False, p=[0.05, 0, 0.05, 0.9, 0, 0, 0, 0, 0, 0])
print(x)  # [3 0 2]

aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher']
x = np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3])
print(x) # ['pooh' 'rabbit' 'pooh' 'pooh' 'pooh']


import numpy as np

np.random.seed(20200614)
x = np.arange(10)
np.random.shuffle(x)
print(x) # 改变原来的数组
# [6 8 7 5 3 9 1 4 0 2]

print(np.random.shuffle(x))
# None

x = np.arange(20).reshape((5, 4))
np.random.shuffle(x)
print(x) # 只沿第 0 轴洗牌
# [[ 4  5  6  7]
#  [ 0  1  2  3]
#  [ 8  9 10 11]
#  [16 17 18 19]
#  [12 13 14 15]]


import numpy as np

np.random.seed(20200614)
x = np.arange(10)
y = np.random.permutation(x)
print(y)
# [6 8 7 5 3 9 1 4 0 2]

z=[1, 4, 9, 12, 15]
print(np.random.permutation(z))
# [ 4  1  9 15 12]
print(z) 
# [1, 4, 9, 12, 15] 不会改变原来的数组


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.amin(x)
print(y)  # 11

y = np.amin(x, axis=0)
print(y)  # [11 12 13 14 15]

y = np.amin(x, axis=1)
print(y)  # [11 16 21 26 31]


import numpy as np

np.random.seed(20200623)
x = np.random.randint(0, 20, size=[4, 5])
print(x)
# [[10  2  1  1 16]
#  [18 11 10 14 10]
#  [11  1  9 18  8]
#  [16  2  0 15 16]]

print(np.ptp(x))  # 18
print(np.ptp(x, axis=0))  # [ 8 10 10 17  8]
print(np.ptp(x, axis=1))  # [15  8 17 16]


import numpy as np

np.random.seed(20200623)
x = np.random.randint(0, 20, size=[4, 5])
print(x)
# [[10  2  1  1 16]
#  [18 11 10 14 10]
#  [11  1  9 18  8]
#  [16  2  0 15 16]]

print(np.percentile(x, [25, 50]))  
# [ 2. 10.]

print(np.percentile(x, [25, 50], axis=0))
# [[10.75  1.75  0.75 10.75  9.5 ]
#  [13.5   2.    5.   14.5  13.  ]]

print(np.percentile(x, [25, 50], axis=1))
# [[ 1. 10.  8.  2.]
#  [ 2. 11.  9. 15.]]


import numpy as np

np.random.seed(20200623)
x = np.random.randint(0, 20, size=[4, 5])
print(x)
# [[10  2  1  1 16]
#  [18 11 10 14 10]
#  [11  1  9 18  8]
#  [16  2  0 15 16]]
print(np.percentile(x, 50))
print(np.median(x))
# 10.0

print(np.percentile(x, 50, axis=0))
print(np.median(x, axis=0))
# [13.5  2.   5.  14.5 13. ]


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.mean(x)
print(y)  # 23.0

y = np.mean(x, axis=1)
print(y)  # [13. 18. 23. 28. 33.]


# 将各数值乘以相应的权数，然后加总求和得到总体值，再除以总的单位数
# 不指定权重的时候average和mean是一样的
import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])

# 权重
y = np.arange(1, 26).reshape([5, 5])
# [[ 1  2  3  4  5]
#  [ 6  7  8  9 10]
#  [11 12 13 14 15]
#  [16 17 18 19 20]
#  [21 22 23 24 25]]

z = np.average(x, weights=y)
print(z)  # 27.0

z = np.average(x, axis=0, weights=y) # 沿着 axis=0（即每一列）计算加权平均值
print(z)
# [25.54545455 26.16666667 26.84615385 27.57142857 28.33333333]
# 乘积之和：
# 11×1+16×6+21×11+26×16+31×21
# =11+96+231+416+651=1405
# 权重之和：
# 1+6+11+16+21=55
# 加权平均：
# 1405/55≈25.54545455


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.var(x)
print(y)  # 52.0
y = np.mean((x - np.mean(x)) ** 2)
print(y)  # 52.0

y = np.var(x, ddof=1) # ddof=1 表示计算样本方差（即除以 n - 1，n 是元素总数）
print(y)  # 54.166666666666664
y = np.sum((x - np.mean(x)) ** 2) / (x.size - 1)
print(y)  # 54.166666666666664

y = np.var(x, axis=0)
print(y)  # [50. 50. 50. 50. 50.]


import numpy as np

x = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
y = np.std(x)
print(y)  # 7.211102550927978
y = np.sqrt(np.var(x))
print(y)  # 7.211102550927978

y = np.std(x, axis=0)
print(y)
# [7.07106781 7.07106781 7.07106781 7.07106781 7.07106781]


import numpy as np

x = [1, 2, 3, 4, 6]
y = [0, 2, 5, 6, 7]
print(np.cov(x))  # 3.7   #样本方差(即除以 n - 1)  等价于print(np.var(x,ddof=1))
print(np.cov(y))  # 8.5   #样本方差
print(np.cov(x, y)) # 协方差矩阵
# [[3.7  5.25]
#  [5.25 8.5 ]]
# 对角线元素是 x 和 y 的样本方差
# 非对角线元素是 x 和 y 的样本协方差


# 计算示例如下：
print(np.var(x))  # 2.96    #方差
print(np.var(x, ddof=1))  # 3.7    #样本方差
print(np.var(y))  # 6.8    #方差
print(np.var(y, ddof=1))  # 8.5    #样本方差

z = np.mean((x - np.mean(x)) * (y - np.mean(y)))    #协方差
print(z)  # 4.2

z = np.sum((x - np.mean(x)) * (y - np.mean(y))) / (len(x) - 1)   #样本协方差
print(z)  # 5.25

z = np.dot(x - np.mean(x), y - np.mean(y)) / (len(x) - 1)     #样本协方差     
print(z)  # 5.25


import numpy as np

np.random.seed(20200623)
x, y = np.random.randint(0, 20, size=(2, 4))
print(x)  # [10  2  1  1]
print(y)  # [16 18 11 10]

z = np.corrcoef(x, y)
print(z)
# [[1.         0.48510096]
#  [0.48510096 1.        ]]
# 对角线元素是 x 和 y 与自身的相关系数（始终为 1）
# 非对角线元素是 x 和 y 的相关系数


# 计算示例如下：
a = np.dot(x - np.mean(x), y - np.mean(y))              # 计算协方差
b = np.sqrt(np.dot(x - np.mean(x), x - np.mean(x)))     # 计算 x 的标准差
c = np.sqrt(np.dot(y - np.mean(y), y - np.mean(y)))
print(a / (b * c))  # 0.4851009629263671                # 相关系数


import numpy as np

x = np.array([0.2, 6.4, 3.0, 1.6]) # 元素
bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0]) # 4个区间
inds = np.digitize(x, bins)
print(inds)  # [1 4 3 2] 每个元素所属的区间的索引
for n in range(x.size):
    print(bins[inds[n] - 1], "<=", x[n], "<", bins[inds[n]])
# 0.0 <= 0.2 < 1.0
# 4.0 <= 6.4 < 10.0
# 2.5 <= 3.0 < 4.0
# 1.0 <= 1.6 < 2.5


x = np.array([1.2, 10.0, 12.4, 15.5, 20.])
bins = np.array([0, 5, 10, 15, 20])
inds = np.digitize(x, bins, right=True) # 区间是左开右闭的
print(inds)  # [1 2 3 4 4]
# 10.0 落在 (5, 10] 索引为 2
# 20.0 落在 (15, 20] 索引为 4

inds = np.digitize(x, bins, right=False)
print(inds)  # [1 3 3 4 5]
# 10.0 落在 [10, 15) 索引为 3


import numpy as np

x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 3, 4, 5, 6])
z = np.dot(x, y) # 1*2+2*3+3*4...
print(z)  # 70

x = np.array([[1, 2, 3], [3, 4, 5], [6, 7, 8]])
# [[1 2 3]
#  [3 4 5]
#  [6 7 8]]
y = np.array([[5, 4, 2], [1, 7, 9], [0, 4, 5]])
# [[5 4 2]
#  [1 7 9]
#  [0 4 5]]
z = np.dot(x, y)
print(z) # z[0, 0]是 x 的第 0 行与 y 的第 0 列的点积=1*5+2*1+3*0=7
# [[  7  30  35]
#  [ 19  60  67]
#  [ 37 105 115]]

z = np.dot(y, x)
print(z)
# [[ 29  40  51]
#  [ 76  93 110]
#  [ 42  51  60]]


import numpy as np
# 创建一个对角矩阵
x = np.diag((1, 2, 3))  
# [[1 0 0]
#  [0 2 0]
#  [0 0 3]]

print(np.linalg.eigvals(x))
# [1. 2. 3.]
a, b = np.linalg.eig(x)  
# 特征值保存在a中，特征向量保存在b中
print(a)
# [1. 2. 3.]
print(b)
# [[1. 0. 0.]
#  [0. 1. 0.]
#  [0. 0. 1.]]

# 检验特征值与特征向量是否正确
for i in range(3): 
    if np.allclose(a[i] * b[:, i], np.dot(x, b[:, i])):
        print('Right')
    else:
        print('Error')
# Right
# Right
# Right


# 判断对称阵是否为正定阵（特征值是否全部为正）
import numpy as np

A = np.arange(16).reshape(4, 4)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]
#  [12 13 14 15]]

A = A + A.T  # 将方阵转换成对称阵
# [[ 0  5 10 15]
#  [ 5 10 15 20]
#  [10 15 20 25]
#  [15 20 25 30]]

B = np.linalg.eigvals(A)  # 求A的特征值
print(B)
# [ 6.74165739e+01 -7.41657387e+00  1.82694656e-15 -1.72637110e-15]

# 判断是不是所有的特征值都大于0，用到了all函数，显然对称阵A不是正定的
if np.all(B > 0):
    print('Yes')
else:
    print('No')
# No


import numpy as np

A = np.array([[4, 11, 14], [8, 7, -2]])
# [[ 4 11 14]
#  [ 8  7 -2]] M=2,N=3

u, s, vh = np.linalg.svd(A, full_matrices=False) # K=2(M<N)
print(u.shape)  # (2, 2)=(M,K)
print(u)
# [[-0.9486833  -0.31622777]
#  [-0.31622777  0.9486833 ]]

print(s.shape)  # (2,)
print(np.diag(s))
# [[18.97366596  0.        ]
#  [ 0.          9.48683298]]

print(vh.shape)  # (2, 3)=(K,N)
print(vh)
# [[-0.33333333 -0.66666667 -0.66666667]
#  [ 0.66666667  0.33333333 -0.66666667]]

a = np.dot(u, np.diag(s))
a = np.dot(a, vh)
print(a)
# [[ 4. 11. 14.]
#  [ 8.  7. -2.]]


import numpy as np
A = np.array([[2, -2, 3], [1, 1, 1], [1, 3, -1]])
# [[ 2 -2  3]
#  [ 1  1  1]
#  [ 1  3 -1]] M=3,N=3

q, r = np.linalg.qr(A)
print(q.shape)  # (3, 3)=(M,N)
print(q)
# [[-0.81649658  0.53452248  0.21821789]
#  [-0.40824829 -0.26726124 -0.87287156]
#  [-0.40824829 -0.80178373  0.43643578]]

print(r.shape)  # (3, 3)=(N,N)
print(r)
# [[-2.44948974  0.         -2.44948974]
#  [ 0.         -3.74165739  2.13808994]
#  [ 0.          0.         -0.65465367]]

print(np.dot(q, r))
# [[ 2. -2.  3.]
#  [ 1.  1.  1.]
#  [ 1.  3. -1.]]

a = np.allclose(np.dot(q.T, q), np.eye(3))
print(a)  # True 说明q为正交矩阵


import numpy as np

A = np.array([[1, 1, 1, 1], [1, 3, 3, 3],
              [1, 3, 5, 5], [1, 3, 5, 7]])
print(A)
# [[1 1 1 1]
#  [1 3 3 3]
#  [1 3 5 5]
#  [1 3 5 7]]

print(np.linalg.eigvals(A))
# [13.13707118  1.6199144   0.51978306  0.72323135]

L = np.linalg.cholesky(A)
print(L)
# [[1.         0.         0.         0.        ]
#  [1.         1.41421356 0.         0.        ]
#  [1.         1.41421356 1.41421356 0.        ]
#  [1.         1.41421356 1.41421356 1.41421356]]

print(np.dot(L, L.T))
# [[1. 1. 1. 1.]
#  [1. 3. 3. 3.]
#  [1. 3. 5. 5.]
#  [1. 3. 5. 7.]]


# 求向量的范数
import numpy as np
x = np.array([1, 2, 3, 4])

# ord=1
print(np.linalg.norm(x, ord=1)) 
# 10.0  |1|+|2|+|3|+|4|=10
print(np.sum(np.abs(x)))  
# 10

# ord=2
print(np.linalg.norm(x, ord=2))  
# 5.477225575051661 根号下平方和
print(np.sqrt(np.sum(np.abs(x) ** 2)))  
# 5.477225575051661

# ord=np.inf
print(np.linalg.norm(x, ord=-np.inf))  
# 1.0   min(|1|,|2|,|3|,|4|)=1
print(np.min(np.abs(x)))  
# 1

print(np.linalg.norm(x, ord=np.inf))  
# 4.0    max(|1|,|2|,|3|,|4|)=4
print(np.max(np.abs(x)))  
# 4


# 求矩阵的范数
import numpy as np
A = np.array([[1, 2, 3, 4], [2, 3, 5, 8],
              [1, 3, 5, 7], [3, 4, 7, 11]])
# [[ 1  2  3  4]
#  [ 2  3  5  8]
#  [ 1  3  5  7]
#  [ 3  4  7 11]]

# ord=1
print(np.linalg.norm(A, ord=1))  # 30.0
print(np.max(np.sum(A, axis=0)))  # 30

# ord=2
print(np.linalg.norm(A, ord=2))  
# 20.24345358700576
print(np.max(np.linalg.svd(A, compute_uv=False)))  
# 20.24345358700576

# ord=inf
print(np.linalg.norm(A, ord=np.inf))  # 25.0
print(np.max(np.sum(A, axis=1)))  # 25

# ord='fro'
print(np.linalg.norm(A, ord='fro'))  
# 20.273134932713294
print(np.sqrt(np.trace(np.dot(A.T, A))))  
# 20.273134932713294


import numpy as np

x = np.array([[1, 2], [3, 4]])
# [[1 2]
#  [3 4]]

print(np.linalg.det(x))
# -2.0000000000000004


import numpy as np

I = np.eye(3)  # 先创建一个单位阵
# [[1. 0. 0.]
#  [0. 1. 0.]
#  [0. 0. 1.]]

r = np.linalg.matrix_rank(I)
print(r)  # 3

I[1, 1] = 0  # 将该元素置为0
print(I)
# [[1. 0. 0.]
#  [0. 0. 0.]
#  [0. 0. 1.]]

r = np.linalg.matrix_rank(I)  # 此时秩变成2
print(r)  # 2


import numpy as np

x = np.array([[1, 2, 3], [3, 4, 5], [6, 7, 8]])
# [[1 2 3]
#  [3 4 5]
#  [6 7 8]]

y = np.array([[5, 4, 2], [1, 7, 9], [0, 4, 5]])
# [[5 4 2]
#  [1 7 9]
#  [0 4 5]]

print(np.trace(x))  # A的迹等于A.T的迹
# 13
print(np.trace(np.transpose(x)))
# 13

print(np.trace(x + y))  # 和的迹 等于 迹的和
# 30
print(np.trace(x) + np.trace(y))
# 30


import numpy as np

A = np.array([[1, -2, 1], [0, 2, -1], [1, 1, -2]])
# [[ 1 -2  1]
#  [ 0  2 -1]
#  [ 1  1 -2]]

# 求A的行列式，不为零则存在逆矩阵
A_det = np.linalg.det(A)  
print(A_det)
# -2.9999999999999996

A_inverse = np.linalg.inv(A)  # 求A的逆矩阵
print(A_inverse)
# [[ 1.00000000e+00  1.00000000e+00 -1.11022302e-16]
#  [ 3.33333333e-01  1.00000000e+00 -3.33333333e-01]
#  [ 6.66666667e-01  1.00000000e+00 -6.66666667e-01]]

x = np.allclose(np.dot(A, A_inverse), np.eye(3))
print(x)  # True
x = np.allclose(np.dot(A_inverse, A), np.eye(3))
print(x)  # True

A_companion = A_inverse * A_det  # 求A的伴随矩阵
print(A_companion)
# [[-3.00000000e+00 -3.00000000e+00  3.33066907e-16]
#  [-1.00000000e+00 -3.00000000e+00  1.00000000e+00]
#  [-2.00000000e+00 -3.00000000e+00  2.00000000e+00]]


#  x + 2y +  z = 7
# 2x -  y + 3z = 7
# 3x +  y + 2z =18

import numpy as np

A = np.array([[1, 2, 1], [2, -1, 3], [3, 1, 2]])
b = np.array([7, 7, 18])
x = np.linalg.solve(A, b)
print(x)  # [ 7.  1. -2.]

x = np.linalg.inv(A).dot(b)
print(x)  # [ 7.  1. -2.]

y = np.allclose(np.dot(A, x), b)
print(y)  # True


from sklearn import datasets
import numpy as np 

iris=datasets.load_iris()
# 求出鸢尾属植物萼片长度的平均值、中位数和标准差（第1列，sepallength）
sepal=iris.data[:,0] # 第1列 萼片长度
print(f'平均值:{np.mean(sepal):.2f}')
print(f'中位数:{np.median(sepal)}')
print(f'标准差:{np.std(sepal)}')

# 创建一种标准化形式的鸢尾属植物萼片长度，其值正好介于0和1之间，这样最小值为0，最大值为1（第1列，sepallength）
sepal_min=np.min(sepal)
sepal_max=np.max(sepal)
normalized_sepal_length = (sepal-sepal_min) / (sepal_max - sepal_min)
# print(sepal_max)
# print(sepal_min)
# print("标准化后的萼片长度:", normalized_sepal_length)

# 找到鸢尾属植物萼片长度的第5和第95百分位数（第1列，sepallength）
print(f'第5和第95百分位数:{np.percentile(sepal,5)}和{np.percentile(sepal,95)}')

# 把iris_data数据集中的20个随机位置修改为np.nan值
np.random.seed(42)

# 在iris_data的sepallength中查找缺失值的个数和位置（第1列）
count=np.isnan(sepal).sum()
indices=np.where(np.isnan(sepal))[0]
print(f"缺失值的个数: {count}")
print(f"缺失值的位置（索引）: {indices}")

#  筛选具有 sepallength（第1列）< 5.0 并且 petallength（第3列）> 1.5 的 iris_data行
petal=iris.data[:,2]
condition=(sepal<5.0)&(petal>1.5)
print(np.where(condition))
# np.where 返回一个元组，因此使用 [0] 来获取第一个元素，即满足条件的行的索引
indices=np.where(condition)[0]
print(indices)
print(iris.data[indices])

#  选择没有任何 nan 值的 iris_data行
mask = ~np.isnan(iris.data).any(axis=1)
# np.isnan(iris.data) True 表示对应位置的元素是 np.nan
# .any(axis=1) 对于每一行，它会检查该行中是否有任何 True
# ~ 取反
rows = iris.data[mask]
print("没有 np.nan 值的行数:", len(rows))
# print("没有 np.nan 值的行数据:")
# print(rows)

# 计算 iris_data 中sepalLength（第1列）和petalLength（第3列）之间的相关系数
print(f'相关系数:{np.corrcoef(sepal,petal)}')

# 找出iris_data是否有任何缺失值
values = np.isnan(iris.data).any() 
print(values)
if values:
    print("数据集中存在缺失值。")
else:
    print("数据集中没有缺失值。")

#  在numpy数组中将所有出现的nan替换为0
iris.data[np.isnan(iris.data)]=0

# 将 iris_data 的花瓣长度（第3列）以形成分类变量的形式显示。定义：Less than 3 --> 'small'；3-5 --> 'medium'；'>=5 --> 'large'
petal_categories = np.where(petal < 3, 'small',np.where((petal >= 3) & (petal < 5), 'medium', 'large'))
print("花瓣长度分类:")
print(petal_categories)

# 根据 sepallength 列对数据集进行排序
sorted_indices = np.argsort(iris.data[:,0])
# 特征（数据）和标签（目标）通常需要分别排序
sorted_data = iris.data[sorted_indices]
sorted_target = iris.target[sorted_indices]

# 在鸢尾属植物数据集中找到最常见的花瓣长度值(第3列)
values, counts = np.unique(iris.data, return_counts=True)
max_count_index = np.argmax(counts)
print(f"最常见的花瓣长度值是: {values[max_count_index]} cm")
print(f"该值出现了 {counts[max_count_index]} 次")

# 在鸢尾花数据集的 petalwidth（第4列）中查找第一次出现的值大于1.0的位置
petal_width=iris.data[:,3]
# 找到第一个大于1.0的值的索引
first_index = np.where(petal_width > 1.0)[0][0]
print(f"第一次出现 petal width 大于1.0的位置是索引: {first_index}")
print(f"对应的 petal width 值是: {petal_width[first_index]}")

字符	对应类型	备注
b	boolean	'b1'
i	signed integer	'i1', 'i2', 'i4', 'i8'
u	unsigned integer	'u1', 'u2', 'u4', 'u8'
f	floating-point	'f2', 'f4', 'f8'
c	complex floating-point
m	timedelta64	表示两个时间之间的间隔
M	datetime64	日期时间类型
O	object
S	(byte-)string	S3表示长度为3的字符串
U	Unicode	Unicode 字符串
V	void

日期单位	代码含义	时间单位	代码含义
Y	年	h	小时
M	月	m	分钟
W	周	s	秒
D	天	ms	毫秒
-	-	us	微秒
-	-	ns	纳秒
-	-	ps	皮秒
-	-	fs	飞秒
-	-	as	阿托秒

常量¶

np.nan¶

np.isnan¶

np.count_nonzero¶

np.inf¶

np.pi¶

np.e¶

数据类型¶

常见数据类型¶

创建数据类型 np.dtype¶

数据类型信息 np.iinfo¶

时间日期和时间增量¶

np.datetime64¶

np.timedelta64时间运算¶

.astype() 转换¶

np.busday_offset 工作日¶

数组的创建 np.array¶

asarray()¶

fromfunction()¶

零数组 zeros() zeros_like()¶

1数组 ones() ones_like()¶

空数组 empty() empty_like()¶

单位数组 eye() identity()¶

对角数组 diag()¶

常数数组 full() full_like()¶

范围数组 arange() linspace() logspace() numpy.random.rand()¶

结构数组的创建¶

数组的属性¶

索引，切片，迭代 .copy()¶

副本与视图 .copy()¶

索引与切片¶

dots 索引¶

数组索引 np.take()¶

布尔索引¶

数组迭代 np.apply_along_axis¶

数组操作¶

更改形状¶

.shape¶

.flat¶

.flatten()¶

np.ravel()¶

np.reshape()¶

数组转置¶

np.transpose(a)¶

numpy.ndarray.T¶

更改维度¶

np.newaxis¶

np.squeeze(a)¶

数组组合¶

np.concatenate((a1, a2, ...))¶

np.stack([a1, a2], axis=0默认)¶

numpy.vstack()¶

numpy.hstack()¶

数组拆分¶

np.split()¶

np.vsplit()¶

np.hsplit()¶

数组平铺¶

np.tile()¶

np.repeat()¶

数组比较¶

np.allclose(A, B)¶

np.array_equal(A, B)¶

添加和删除元素¶

np.unique¶

逻辑函数*¶

np.all()¶

np.any()¶

np.isnan()¶

np.logical_not()¶

np.logical_and()¶

np.logical_or()¶

np.logical_xor()¶

np.greater()¶

np.greater_equal()¶

np.equal()¶

np.not_equal()¶

np.less()¶

np.less_equal()¶

np.isclose¶