集合与字典

添码座大约 7 分钟

set（集合）

# 集合：无序且元素不重复
# 初始化方法一
set1 = {1, 2, 3, 4, 5}
# 初始化方法二
set1 = set([1, 2, 3, 4, 5])
print(set1)                                         # {1, 2, 3, 4, 5}
print(type(set1))                                   # <class 'set'>
# 集合不能使用下标和切片访问，但可以获取长度
print(len(set1))                                    # 5
# 添加元素
set1.add(9)
set1.add('a')
print(set1)                                         # {1, 2, 3, 4, 5, 9, 'a'}
# 删除元素
set1.discard(9)
print(set1)                                         # {1, 2, 3, 4, 5, 'a'}
set1.remove('a')
print(set1)                                         # {1, 2, 3, 4, 5}
# set没有通过下标读取元素的方法，只能判断元素是否在集合中
print(1 in set1)                                    # True
print(10 not in set1)                               # True
# 遍历
for i in set1:
    print(i)

set2 = {1, 2, 3, 4, 5, 6}
set3 = {2, 3, 7}
# 求差集：存在于set2中，但不在set3中的元素
print(set2 - set3)                                  # {1, 4, 5, 6}
print(set2.difference(set3))                        # {1, 4, 5, 6}
# 求补集：去掉两个集合中都有的，然后加上两个集合中分别都没有的
print(set2 ^ set3)                                  # {1, 4, 5, 6, 7}
print(set3 ^ set2)                                  # {1, 4, 5, 6, 7}
# 求交集：既存在于set2，也存在于set3中的元素
print(set2 & set3)                                  # {2, 3}
print(set2.intersection(set3))                      # {2, 3}
# 求并集
print(set2 | set3)                                  # {1, 2, 3, 4, 5, 6, 7}
print(set2.union(set3))                             # {1, 2, 3, 4, 5, 6, 7}
# 也可以通过 update()方法完成
set2.update(set3)
print(set2)                                         # {1, 2, 3, 4, 5, 6, 7}
# 空的{}表示字典
print(type({}))                                     # <class 'dict'>
# 定义空集合的正确方式
print(type(set()))                                  # <class 'set'>
print(len(set()))                                   # 0
# set1 是否是 set2 的子集
set1.issubset(set2)                                 # True

# frozenset和set是一样的
set4 = frozenset(['a', 'b', 'c', 'd', 'e'])
# 或者
set4 = frozenset('abcdee')
print(set4)                                         # frozenset({'b', 'e', 'd', 'a', 'c'})
print(type(set4))                                   # <class 'frozenset'>
# frozenset既不能添加元素，也不能删除元素，因为它没有 add() 和 discard()/remove() 方法
# 所以它适合作为dict的key
set5 = frozenset([1, 2, 3, 4, 5])
print(set5)                                         # frozenset({1, 2, 3, 4, 5})

# 集合推导式
dict_list = {"name": "lixingyun", "age": 18, "sex": "male"}
set_list1 = {i for i in range(10) if i % 2 == 1}
set_list2 = {v for k, v in dict_list.items()}
print(set_list1)                                    # {1, 3, 5, 7, 9}
print(set_list2)                                    # {'male', 18, 'lixingyun'}

dict（字典）

from collections.abc import Mapping, MutableMapping

# dict属于Mapping类型
print(isinstance(dict, MutableMapping))   # False
print(isinstance({}, MutableMapping))     # True
print(isinstance(dict, Mapping))          # False
print(isinstance({}, Mapping))            # True

# 字典也是集合类型，dict可以使用不同的数据类型作为key
dict1 = {"a" : 1, "a" : 2, "b" : 2, "c" : 3, 10 : 4}
print(type(dict1))                        # <class 'dict'>
# 重复的key会被删除，相同的键只保留最后一个
print(dict1)                              # {'a': 2, 'b': 2, 'c': 3, 10: 4}
print(dict1["a"])                         # 2
print(dict1[10])                          # 4
# key必须是不可变类型
# 可以将元组作为key
dict2 = {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
print(dict2)                              # {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
print(dict2[(1, 2)])                      # {"name" : "lixingyun"}

# 浅拷贝
dict3 = dict2.copy()
print(dict3)                              # {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
# 修改dict3
dict3[(1, 2)]["name"] = "wanglin"
print(dict3)                              # {(1, 2): {'name': 'wanglin'}, 'a': 2}
# 因为浅拷贝，dict2中的值也被修改了
print(dict2)                              # {(1, 2): {'name': 'wanglin'}, 'a': 2}

# 深拷贝
import copy
dict4 = copy.deepcopy(dict2)
# 修改dict4
dict4[(1, 2)]["name"] = "xiaoyan"
print(dict4)                              # {(1, 2): {'name': 'xiaoyan'}, 'a': 2}
print(dict2)                              # {(1, 2): {'name': 'wanglin'}, 'a': 2}
dict2.clear()
print(dict2)                              # {}

data_list = ["lixingyun", "wanglin"]
dict5 = dict.fromkeys(data_list, 9)
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9}
# 使用get()方法不会出现keyerror错误
print(dict5.get("xiaoyan", 10))           # 10
# 遍历字典
for key, value in dict5.items():
    print(key, value)

# 给一个不存在的key设置值
dict5.setdefault("xiaoyan", 11)
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11}
# 效果等同于下面的操作
dict5["qinyu"] = 12
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12}
# 或者
dict5.update({"lindong" : 13})
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12, 'lindong': 13}
# update()也可以传递多个值
dict5.update(a=1, b=2)
# 用这种方式也是可以的
dict5.update([("c", 3), ("d", 4)])
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12, 'lindong': 13, 'a': 1, 'b': 2, 'c': 3, 'd': 3}
# 随机删除一个（貌似总是删除最后一条）
value = dict5.popitem()
print(value)                              # ('d', 4)
value = dict5.popitem()
print(value)                              # ('c', 3)
del dict5["a"]
del dict5["b"]
del dict5["lindong"]
print(dict5)                              # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12}

# 用字典代替switch case
switcher1 = {
    1 : "one",
    2 : "two",
    3 : "three",
    4 : "four",
    5 : "five"
}
print(switcher1[1])                       # one
def switch(arg):
    return switcher1.get(arg, "nothing")
print(switch(0))                          # nothing
def func1():
    return "one"
def func2():
    return "two"
def func3():
    return "three"
def default():
    return "nothing"
switcher2 = {
    1: func1,
    2: func2,
    3: func3,
}
func = switcher2.get(0, default)
print(func())                             # nothing

names = {
    "a": 1,
    "b": 2,
    "c": 3,
    "d": 4,
    "e": 5,
    "f": 6,
    "g": 7,
    "h": 8,
    "i": 9,
    "j": 10,
}
# 字典推导式
dict6 = {i: i ** 2 for i in range(10) if i % 2 == 1}
print(dict6)                              # {1: 1, 3: 9, 5: 25, 7: 49, 9: 81}
# 有序推导式
dict7 = [v * v for key, v in names.items() if v % 2 == 0]
print(dict7)                              # [4, 16, 36, 64, 100]
# 无序推导式
dict8 = {v * v for key, v in names.items() if v % 2 == 0}
print(dict8)                              # {64, 100, 4, 36, 16}
# 生成器
dict9 = (v * v for key, v in names.items() if v % 2 == 0)
print(dict9)                              # <generator object <genexpr> at 0x7fb5300cf660>
# 遍历
for i in dict9:
    print(i)

OrderedDict（有序字典）

from collections import OrderedDict
# OrderedDict 继承自 dict，dict 中的所有操作 OrderedDict 都适用
user_dict = OrderedDict()
user_dict['name'] = 'lixingyun'
user_dict['age'] = 18
user_dict['gender'] = 'male'
# 有序性指的是：输出顺序和输入顺序一致
print(user_dict)                                    # OrderedDict([('name', 'lixingyun'), ('age', 18), ('gender', 'male')])
print(user_dict.popitem())                          # ('gender', 'male')
print(user_dict)                                    # OrderedDict([('name', 'lixingyun'), ('age', 18)])
print(user_dict.pop("name"))                        # lixingyun
print(user_dict)                                    # OrderedDict([('age', 18)])
user_dict['gender'] = 'male'
user_dict['name'] = 'lixingyun'
print(user_dict)                                    # OrderedDict([('age', 18), ('gender', 'male'), ('name', 'lixingyun')])
user_dict.move_to_end('age')
print(user_dict)                                    # OrderedDict([('gender', 'male'), ('name', 'lixingyun'), ('age', 18)])

自定义字典

# 不要直接继承list和dict
class MyDict1(dict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value * 2)

# 没有调用 super().__setitem__(key, value * 2)
my_dict1 = MyDict1(lixingyun=1)
print(my_dict1)                                     # {'lixingyun': 1}
# 调用了 super().__setitem__(key, value * 2)
my_dict1["wanglin"] = 2
print(my_dict1)                                     # {'lixingyun': 1, 'wanglin': 4}

# 专用于继承的dict
from collections import UserDict
class MyDict2(UserDict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value * 2)

# 都调用了 super().__setitem__(key, value * 2)
my_dict2 = MyDict2(lixingyun=1)
print(my_dict2)                                     # {'lixingyun': 2}
my_dict2["wanglin"] = 2
print(my_dict2)                                     # {'lixingyun': 2, 'wanglin': 4}

from collections import defaultdict
my_dict3 = {}
# 如果找不到值就返回默认值，但并不会真正地添加键值对
print(my_dict3.get("lixingyun", 11))                # 11
print(my_dict3)                                     # {}
# 但是setdefault方法会永久性地添加键值对
my_dict4 = {}
my_dict4.setdefault("lixingyun", 19)
print(my_dict4)                                     # {'lixingyun': 19}
# 统计词频
my_dict5 = {}
persons = ["lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"]
# 使用传统方式统计
for person in persons:
    if person not in my_dict5:
        my_dict5[person] = 0
    my_dict5[person] += 1
print(my_dict5)                                     # {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2}
# 使用setdefault统计词频
my_dict6 = {}
for person in persons:
    my_dict6.setdefault(person, 0)
    my_dict6[person] += 1
print(my_dict6)                                     # {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2}
# 使用defaultdict统计词频
my_dict7 = {}
my_dict7 = defaultdict(int)
for person in persons:
    my_dict7[person] += 1
print(my_dict7)                                     # defaultdict(<class 'int'>, {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2})

# 使用defaultdict创建默认的dict结构
# 通过__missing__()魔术方法实现
def default_dict():
    return {
        "name":"",
        "age":0
    }

my_dict8 = defaultdict(default_dict)
print(my_dict8["default_dict"])                     # {'name': '', 'age': 0}

my_dict9 = defaultdict(dict)
# 因为调用了 defaultdict.__missing__()，所以不会报错
print(my_dict9["lixingyun"])                        # {}
print(my_dict9)                                     # defaultdict(<class 'dict'>, {'lixingyun': {}})

Counter（计数器）

# Counter也继承自dict
from collections import Counter
persons = ["lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"]
persons_counter = Counter(persons)
# 最简单的统计方法，统计任意可迭代对象，结果按倒序排列
print(persons_counter)                              # Counter({'wanglin': 3, 'xiaoyan': 2, 'lixingyun': 1})
str_counter = Counter('principle')
print(str_counter)                                  # Counter({'p': 2, 'i': 2, 'r': 1, 'n': 1, 'c': 1, 'l': 1, 'e': 1})
# 追加统计
str_counter.update('people')
print(str_counter)                                  # Counter({'p': 4, 'e': 3, 'i': 2, 'l': 2, 'r': 1, 'n': 1, 'c': 1, 'o': 1})
# 还可以追加其他的counter
str_counter.update(persons_counter)
print(str_counter)                                  # Counter({'p': 4, 'e': 3, 'wanglin': 3, 'i': 2, 'l': 2, 'xiaoyan': 2, 'r': 1, 'n': 1, 'c': 1, 'o': 1, 'lixingyun': 1})
# 统计字典
dict_counter = Counter({"name": "lixingyun", "age": 18})
print(dict_counter)                                 # Counter({'name': 'lixingyun', 'age': 18})
# 统计元组
tuple_counter = Counter((1, 2, 1, 3, 3, 2, 2, 2, 3))
print(tuple_counter)                                # Counter({2: 4, 3: 3, 1: 2})
# 统计集合
set_counter = Counter({"lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"})
print(set_counter)                                  # Counter({'wanglin': 1, 'xiaoyan': 1, 'lixingyun': 1})
# 统计队列
from collections import deque
deque_counter = Counter(deque([1, 2, 1, 3, 3, 2, 2, 2, 3]))
print(deque_counter)                                # Counter({2: 4, 3: 3, 1: 2})
# 统计默认字典
from collections import defaultdict
default_counter = Counter(defaultdict(int, {"name": "lixingyun", "age": 18}))
print(default_counter)                              # Counter({'name': 'lixingyun', 'age': 18})
# 列出前n个出现次数最多的元素，相当于TopN，如果n为None，则列出所有元素计数
print(persons_counter.most_common(2))               # [('wanglin', 3), ('xiaoyan', 2)]

ChainMap（链式字典）

# ChainMap 也继承自 dict
from collections import ChainMap
user_dict1 = {'name': 'lixingyun', 'address': 20}
user_dict2 = {'gender': 'male', 'address': 'beijing'}
# 如果需要遍历数据，则需要单独遍历，可以通过 ChainMap 将这些 dict 组合起来进行链式遍历
chain_map = ChainMap(user_dict2, user_dict1)
for key, value in chain_map.items():
    print(key, "==>", value)

print(chain_map)                                    # ChainMap({'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20})
# 如果多个 dict 中有相同的 key，那么只会保留在参数列表中靠前的那个 dict 的 key
print(chain_map['address'])                         # beijing
# 增加其他的 dict
new_chain_map = chain_map.new_child({'hobby': 'swimming'})
# 默认添加在头部
print(new_chain_map)                                # ChainMap({'hobby': 'swimming', 'edu': 'master'}, {'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20})
# 转换成 dict 的 array 数组
print(new_chain_map.maps)                           # [{'hobby': 'swimming'}, {'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20}]

感谢支持

更多内容，请移步《超级个体》。