python基础知识之索引与切片详解

来源：脚本之家时间：2022-05-14 13:56:28

基本索引

In [4]: sentence = "You are a nice girl"In [5]: L = sentence.split()In [6]: LOut[6]: ["You", "are", "a", "nice", "girl"]

# 从0开始索引In [7]: L[2]Out[7]: "a"

# 负数索引，从列表右侧开始计数In [8]: L[-2]Out[8]: "nice"

# -1表示列表最后一项In [9]: L[-1]Out[9]: "girl"

# 当正整数索引超过返回时In [10]: L[100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
 in ()----> 1 L[100]IndexError: list index out of range# 当负整数索引超过返回时In [11]: L[-100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
 in ()----> 1 L[-100]IndexError: list index out of range# slice 索引In [193]: sl = slice(0,-1,1)In [194]: L[sl]Out[194]: ["You", "are", "a", "nice"]In [199]: sl = slice(0,100)In [200]: L[sl]Out[200]: ["You", "are", "a", "nice", "girl"]

嵌套索引

In [14]: L = [[1,2,3],{"I":"You are a nice girl","She":"Thank you!"},(11,22),"My name is Kyles"]

In [15]: L
Out[15]:
[[1, 2, 3],
 {"I": "You are a nice girl", "She": "Thank you!"},
 (11, 22),
 "My name is Kyles"]# 索引第1项，索引为0In [16]: L[0]
Out[16]: [1, 2, 3]# 索引第1项的第2子项In [17]: L[0][1]
Out[17]: 2# 索引第2项词典In [18]: L[1]
Out[18]: {"I": "You are a nice girl", "She": "Thank you!"}# 索引第2项词典的 “She”In [19]: L[1]["She"]
Out[19]: "Thank you!"# 索引第3项In [20]: L[2]
Out[20]: (11, 22)# 索引第3项，第一个元组In [22]: L[2][0]
Out[22]: 11# 索引第4项In [23]: L[3]
Out[23]: "My name is Kyles"# 索引第4项，前3个字符In [24]: L[3][:3]
Out[24]: "My "

切片

# 切片选择,从1到列表末尾In [13]: L[1:]Out[13]: ["are", "a", "nice", "girl"]# 负数索引，选取列表后两项In [28]: L[-2:]Out[28]: ["nice", "girl"]# 异常测试,这里没有报错！In [29]: L[-100:]Out[29]: ["You", "are", "a", "nice", "girl"]# 返回空In [30]: L[-100:-200]Out[30]: []# 正向索引In [32]: L[-100:3]Out[32]: ["You", "are", "a"]# 返回空In [33]: L[-1:3]Out[33]: []# 返回空In [41]: L[0:0]Out[41]: []

看似简单的索引，有的人不以为然，我们这里采用精准的数字索引，很容易排查错误。若索引是经过计算出的一个变量，就千万要小心了，否则失之毫厘差之千里。

numpy.array 索引一维

In [34]: import numpy as npIn [35]: arr = np.arange(10)In [36]: arrOut[36]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [40]: arr.shapeOut[40]: (10,)# [0,1) In [37]: arr[0:1]Out[37]: array([0])# [0,0) In [38]: arr[0:0]Out[38]: array([], dtype=int32)# 右侧超出范围之后In [42]: arr[:1000]Out[42]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 左侧超出之后In [43]: arr[-100:1000]Out[43]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 两侧都超出In [44]: arr[100:101]Out[44]: array([], dtype=int32)# []In [45]: arr[-100:-2]Out[45]: array([0, 1, 2, 3, 4, 5, 6, 7])# []In [46]: arr[-100:-50]Out[46]: array([], dtype=int32)

numpy.array 索引二维

In [49]: arr = np.arange(15).reshape(3,5)

In [50]: arr
Out[50]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [51]: arr.shape
Out[51]: (3, 5)

# axis = 0 增长的方向
In [52]: arr[0]
Out[52]: array([0, 1, 2, 3, 4])

# 选取第2行
In [53]: arr[1]
Out[53]: array([5, 6, 7, 8, 9])

# axis = 1 增长的方向，选取每一行的第1列
In [54]: arr[:,0]
Out[54]: array([ 0,  5, 10])

# axis = 1 增长的方向，选取每一行的第2列
In [55]: arr[:,1]
Out[55]: array([ 1,  6, 11])


# 选取每一行的第1,2列
In [56]: arr[:,0:2]
Out[56]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# 右侧超出范围之后
In [57]: arr[:,0:100]
Out[57]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

# 左侧超出范围之后
In [62]: arr[:,-10:2]
Out[62]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# []
In [58]: arr[:,0:0]
Out[58]: array([], shape=(3, 0), dtype=int32)

# []
In [59]: arr[0:0,0:1]
Out[59]: array([], shape=(0, 1), dtype=int32)

# 异常
In [63]: arr[:,-10]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
 in ()----> 1 arr[:,-10]IndexError: index -10 is out of bounds for axis 1 with size 5

numpy.array 索引三维…N维

In [67]: import numpy as np

In [68]: arr = np.arange(30).reshape(2,3,5)

In [69]: arr
Out[69]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 根据 axis = 0 选取
In [70]: arr[0]
Out[70]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [71]: arr[1]
Out[71]:
array([[15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]])

# 根据 axis = 1 选取
In [72]: arr[:,0]
Out[72]:
array([[ 0,  1,  2,  3,  4],
       [15, 16, 17, 18, 19]])

In [73]: arr[:,1]
Out[73]:
array([[ 5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24]])

# 异常指出 axis = 1 超出范围
In [74]: arr[:,4]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
 in ()----> 1 arr[:,4]IndexError: index 4 is out of bounds for axis 1 with size 3  # 根据 axis = 2 选取
In [75]: arr[:,:,0]
Out[75]:
array([[ 0,  5, 10],
       [15, 20, 25]])

# 降维
In [76]: arr[:,:,0].shape
Out[76]: (2, 3)

In [78]: arr[:,:,0:2]
Out[78]:
array([[[ 0,  1],
        [ 5,  6],
        [10, 11]],       [[15, 16],
        [20, 21],
        [25, 26]]])

In [79]: arr[:,:,0:2].shape
Out[79]: (2, 3, 2)

# 左/右侧超出范围
In [81]: arr[:,:,0:100]
Out[81]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 异常 axis = 0In [82]: arr[100,:,0:100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
 in ()----> 1 arr[100,:,0:100]IndexError: index 100 is out of bounds for axis 0 with size 2

pandas Series 索引

In [84]: s = pd.Series(["You","are","a","nice","girl"])In [85]: sOut[85]:0     You1     are2       a3    nice4    girl
dtype: object# 按照索引选择In [86]: s[0]Out[86]: "You"# []In [87]: s[0:0]Out[87]: Series([], dtype: object)In [88]: s[0:-1]Out[88]:0     You1     are2       a3    nice
dtype: object# 易错点,ix包含区间为 []In [91]: s.ix[0:0]Out[91]:0    You
dtype: objectIn [92]: s.ix[0:1]Out[92]:0    You1    are
dtype: object# ix索引不存在indexIn [95]: s.ix[400]
KeyError: 400# 按照从0开始的索引In [95]: s.iloc[0]Out[95]: "You"In [96]: s.iloc[1]Out[96]: "are"In [97]: s.iloc[100]
IndexError: single positional indexer is out-of-boundsIn [98]: s = pd.Series(["You","are","a","nice","girl"], index=list("abcde"))In [99]: sOut[99]:
a     You
b     are
c       a
d    nice
e    girl
dtype: objectIn [100]: s.iloc[0]Out[100]: "You"In [101]: s.iloc[1]Out[101]: "are"# 按照 label 索引In [103]: s.loc["a"]Out[103]: "You"In [104]: s.loc["b"]Out[104]: "are"In [105]: s.loc[["b","a"]]Out[105]:
b    are
a    You
dtype: object# loc切片索引In [106]: s.loc["a":"c"]Out[106]:
a    You
b    are
c      a
dtype: objectIn [108]: s.indexOut[108]: Index(["a", "b", "c", "d", "e"], dtype="object")

pandas DataFrame 索引

In [114]: import pandas as pdIn [115]: df = pd.DataFrame({"open":[1,2,3],"high":[4,5,6],"low":[6,3,1]}, index=pd.period_range("30/12/2017",perio
     ...: ds=3,freq="H"))In [116]: dfOut[116]:
                  high  low  open2017-12-30 00:00     4    6     12017-12-30 01:00     5    3     22017-12-30 02:00     6    1     3# 按列索引In [117]: df["high"]Out[117]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [118]: df.highOut[118]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [120]: df[["high","open"]]Out[120]:
                  high  open2017-12-30 00:00     4     12017-12-30 01:00     5     22017-12-30 02:00     6     3In [122]: df.ix[:]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexingIn [123]: df.iloc[0:0]Out[123]:Empty DataFrame
Columns: [high, low, open]Index: []In [124]: df.ix[0:0]Out[124]:Empty DataFrame
Columns: [high, low, open]Index: []

# 按照 label 索引In [127]: df.indexOut[127]: PeriodIndex(["2017-12-30 00:00", "2017-12-30 01:00", "2017-12-30 02:00"], dtype="period[H]", freq="H")In [128]: df.loc["2017-12-30 00:00"]Out[128]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64

# 检查参数In [155]: df.loc["2017-12-30 00:00:11"]Out[155]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64In [156]: df.loc["2017-12-30 00:00:66"]
KeyError: "the label [2017-12-30 00:00:66] is not in the [index]"

填坑

In [158]: df = pd.DataFrame({"a":[1,2,3],"b":[4,5,6]}, index=[2,3,4])In [159]: dfOut[159]:
   a  b2  1  43  2  54  3  6# iloc 取第一行正确用法In [160]: df.iloc[0]Out[160]:
a    1b    4Name: 2, dtype: int64

# loc 正确用法In [165]: df.loc[[2,3]]Out[165]:
   a  b2  1  43  2  5# 注意此处 index 是什么类型In [167]: df.loc["2"]
KeyError: "the label [2] is not in the [index]"# 索引 Int64IndexOut[172]: Int64Index([2, 3, 4], dtype="int64")

# 索引为字符串In [168]: df = pd.DataFrame({"a":[1,2,3],"b":[4,5,6]}, index=list("234"))In [169]: dfOut[169]:
   a  b2  1  43  2  54  3  6In [170]: df.indexOut[170]: Index(["2", "3", "4"], dtype="object")

# 此处没有报错，千万注意 index 类型In [176]: df.loc["2"]Out[176]:
a    1b    4Name: 2, dtype: int64

# ix 是一个功能强大的函数，但是争议却很大，往往是错误之源
# 咦，怎么输出与预想不一致！In [177]: df.ix[2]
D:\CodeTool\Python\Python36\Scripts\ipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecatedOut[177]:
a    3b    6Name: 4, dtype: int64

# 注意开闭区间In [180]: df.loc["2":"3"]Out[180]:
   a  b2  1  43  2  5