集合与字典
大约 7 分钟
set(集合)
# 集合:无序且元素不重复
# 初始化方法一
set1 = {1, 2, 3, 4, 5}
# 初始化方法二
set1 = set([1, 2, 3, 4, 5])
print(set1) # {1, 2, 3, 4, 5}
print(type(set1)) # <class 'set'>
# 集合不能使用下标和切片访问,但可以获取长度
print(len(set1)) # 5
# 添加元素
set1.add(9)
set1.add('a')
print(set1) # {1, 2, 3, 4, 5, 9, 'a'}
# 删除元素
set1.discard(9)
print(set1) # {1, 2, 3, 4, 5, 'a'}
set1.remove('a')
print(set1) # {1, 2, 3, 4, 5}
# set没有通过下标读取元素的方法,只能判断元素是否在集合中
print(1 in set1) # True
print(10 not in set1) # True
# 遍历
for i in set1:
print(i)
set2 = {1, 2, 3, 4, 5, 6}
set3 = {2, 3, 7}
# 求差集:存在于set2中,但不在set3中的元素
print(set2 - set3) # {1, 4, 5, 6}
print(set2.difference(set3)) # {1, 4, 5, 6}
# 求补集:去掉两个集合中都有的,然后加上两个集合中分别都没有的
print(set2 ^ set3) # {1, 4, 5, 6, 7}
print(set3 ^ set2) # {1, 4, 5, 6, 7}
# 求交集:既存在于set2,也存在于set3中的元素
print(set2 & set3) # {2, 3}
print(set2.intersection(set3)) # {2, 3}
# 求并集
print(set2 | set3) # {1, 2, 3, 4, 5, 6, 7}
print(set2.union(set3)) # {1, 2, 3, 4, 5, 6, 7}
# 也可以通过 update()方法完成
set2.update(set3)
print(set2) # {1, 2, 3, 4, 5, 6, 7}
# 空的{}表示字典
print(type({})) # <class 'dict'>
# 定义空集合的正确方式
print(type(set())) # <class 'set'>
print(len(set())) # 0
# set1 是否是 set2 的子集
set1.issubset(set2) # True
# frozenset和set是一样的
set4 = frozenset(['a', 'b', 'c', 'd', 'e'])
# 或者
set4 = frozenset('abcdee')
print(set4) # frozenset({'b', 'e', 'd', 'a', 'c'})
print(type(set4)) # <class 'frozenset'>
# frozenset既不能添加元素,也不能删除元素,因为它没有 add() 和 discard()/remove() 方法
# 所以它适合作为dict的key
set5 = frozenset([1, 2, 3, 4, 5])
print(set5) # frozenset({1, 2, 3, 4, 5})
# 集合推导式
dict_list = {"name": "lixingyun", "age": 18, "sex": "male"}
set_list1 = {i for i in range(10) if i % 2 == 1}
set_list2 = {v for k, v in dict_list.items()}
print(set_list1) # {1, 3, 5, 7, 9}
print(set_list2) # {'male', 18, 'lixingyun'}
dict(字典)
from collections.abc import Mapping, MutableMapping
# dict属于Mapping类型
print(isinstance(dict, MutableMapping)) # False
print(isinstance({}, MutableMapping)) # True
print(isinstance(dict, Mapping)) # False
print(isinstance({}, Mapping)) # True
# 字典也是集合类型,dict可以使用不同的数据类型作为key
dict1 = {"a" : 1, "a" : 2, "b" : 2, "c" : 3, 10 : 4}
print(type(dict1)) # <class 'dict'>
# 重复的key会被删除,相同的键只保留最后一个
print(dict1) # {'a': 2, 'b': 2, 'c': 3, 10: 4}
print(dict1["a"]) # 2
print(dict1[10]) # 4
# key必须是不可变类型
# 可以将元组作为key
dict2 = {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
print(dict2) # {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
print(dict2[(1, 2)]) # {"name" : "lixingyun"}
# 浅拷贝
dict3 = dict2.copy()
print(dict3) # {(1, 2) : {"name" : "lixingyun"}, "a" : 2}
# 修改dict3
dict3[(1, 2)]["name"] = "wanglin"
print(dict3) # {(1, 2): {'name': 'wanglin'}, 'a': 2}
# 因为浅拷贝,dict2中的值也被修改了
print(dict2) # {(1, 2): {'name': 'wanglin'}, 'a': 2}
# 深拷贝
import copy
dict4 = copy.deepcopy(dict2)
# 修改dict4
dict4[(1, 2)]["name"] = "xiaoyan"
print(dict4) # {(1, 2): {'name': 'xiaoyan'}, 'a': 2}
print(dict2) # {(1, 2): {'name': 'wanglin'}, 'a': 2}
dict2.clear()
print(dict2) # {}
data_list = ["lixingyun", "wanglin"]
dict5 = dict.fromkeys(data_list, 9)
print(dict5) # {'lixingyun': 9, 'wanglin': 9}
# 使用get()方法不会出现keyerror错误
print(dict5.get("xiaoyan", 10)) # 10
# 遍历字典
for key, value in dict5.items():
print(key, value)
# 给一个不存在的key设置值
dict5.setdefault("xiaoyan", 11)
print(dict5) # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11}
# 效果等同于下面的操作
dict5["qinyu"] = 12
print(dict5) # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12}
# 或者
dict5.update({"lindong" : 13})
print(dict5) # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12, 'lindong': 13}
# update()也可以传递多个值
dict5.update(a=1, b=2)
# 用这种方式也是可以的
dict5.update([("c", 3), ("d", 4)])
print(dict5) # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12, 'lindong': 13, 'a': 1, 'b': 2, 'c': 3, 'd': 3}
# 随机删除一个(貌似总是删除最后一条)
value = dict5.popitem()
print(value) # ('d', 4)
value = dict5.popitem()
print(value) # ('c', 3)
del dict5["a"]
del dict5["b"]
del dict5["lindong"]
print(dict5) # {'lixingyun': 9, 'wanglin': 9, 'xiaoyan': 11, 'qinyu': 12}
# 用字典代替switch case
switcher1 = {
1 : "one",
2 : "two",
3 : "three",
4 : "four",
5 : "five"
}
print(switcher1[1]) # one
def switch(arg):
return switcher1.get(arg, "nothing")
print(switch(0)) # nothing
def func1():
return "one"
def func2():
return "two"
def func3():
return "three"
def default():
return "nothing"
switcher2 = {
1: func1,
2: func2,
3: func3,
}
func = switcher2.get(0, default)
print(func()) # nothing
names = {
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"e": 5,
"f": 6,
"g": 7,
"h": 8,
"i": 9,
"j": 10,
}
# 字典推导式
dict6 = {i: i ** 2 for i in range(10) if i % 2 == 1}
print(dict6) # {1: 1, 3: 9, 5: 25, 7: 49, 9: 81}
# 有序推导式
dict7 = [v * v for key, v in names.items() if v % 2 == 0]
print(dict7) # [4, 16, 36, 64, 100]
# 无序推导式
dict8 = {v * v for key, v in names.items() if v % 2 == 0}
print(dict8) # {64, 100, 4, 36, 16}
# 生成器
dict9 = (v * v for key, v in names.items() if v % 2 == 0)
print(dict9) # <generator object <genexpr> at 0x7fb5300cf660>
# 遍历
for i in dict9:
print(i)
OrderedDict(有序字典)
from collections import OrderedDict
# OrderedDict 继承自 dict,dict 中的所有操作 OrderedDict 都适用
user_dict = OrderedDict()
user_dict['name'] = 'lixingyun'
user_dict['age'] = 18
user_dict['gender'] = 'male'
# 有序性指的是:输出顺序和输入顺序一致
print(user_dict) # OrderedDict([('name', 'lixingyun'), ('age', 18), ('gender', 'male')])
print(user_dict.popitem()) # ('gender', 'male')
print(user_dict) # OrderedDict([('name', 'lixingyun'), ('age', 18)])
print(user_dict.pop("name")) # lixingyun
print(user_dict) # OrderedDict([('age', 18)])
user_dict['gender'] = 'male'
user_dict['name'] = 'lixingyun'
print(user_dict) # OrderedDict([('age', 18), ('gender', 'male'), ('name', 'lixingyun')])
user_dict.move_to_end('age')
print(user_dict) # OrderedDict([('gender', 'male'), ('name', 'lixingyun'), ('age', 18)])
自定义字典
# 不要直接继承list和dict
class MyDict1(dict):
def __setitem__(self, key, value):
super().__setitem__(key, value * 2)
# 没有调用 super().__setitem__(key, value * 2)
my_dict1 = MyDict1(lixingyun=1)
print(my_dict1) # {'lixingyun': 1}
# 调用了 super().__setitem__(key, value * 2)
my_dict1["wanglin"] = 2
print(my_dict1) # {'lixingyun': 1, 'wanglin': 4}
# 专用于继承的dict
from collections import UserDict
class MyDict2(UserDict):
def __setitem__(self, key, value):
super().__setitem__(key, value * 2)
# 都调用了 super().__setitem__(key, value * 2)
my_dict2 = MyDict2(lixingyun=1)
print(my_dict2) # {'lixingyun': 2}
my_dict2["wanglin"] = 2
print(my_dict2) # {'lixingyun': 2, 'wanglin': 4}
from collections import defaultdict
my_dict3 = {}
# 如果找不到值就返回默认值,但并不会真正地添加键值对
print(my_dict3.get("lixingyun", 11)) # 11
print(my_dict3) # {}
# 但是setdefault方法会永久性地添加键值对
my_dict4 = {}
my_dict4.setdefault("lixingyun", 19)
print(my_dict4) # {'lixingyun': 19}
# 统计词频
my_dict5 = {}
persons = ["lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"]
# 使用传统方式统计
for person in persons:
if person not in my_dict5:
my_dict5[person] = 0
my_dict5[person] += 1
print(my_dict5) # {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2}
# 使用setdefault统计词频
my_dict6 = {}
for person in persons:
my_dict6.setdefault(person, 0)
my_dict6[person] += 1
print(my_dict6) # {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2}
# 使用defaultdict统计词频
my_dict7 = {}
my_dict7 = defaultdict(int)
for person in persons:
my_dict7[person] += 1
print(my_dict7) # defaultdict(<class 'int'>, {'lixingyun': 1, 'wanglin': 3, 'xiaoyan': 2})
# 使用defaultdict创建默认的dict结构
# 通过__missing__()魔术方法实现
def default_dict():
return {
"name":"",
"age":0
}
my_dict8 = defaultdict(default_dict)
print(my_dict8["default_dict"]) # {'name': '', 'age': 0}
my_dict9 = defaultdict(dict)
# 因为调用了 defaultdict.__missing__(),所以不会报错
print(my_dict9["lixingyun"]) # {}
print(my_dict9) # defaultdict(<class 'dict'>, {'lixingyun': {}})
Counter(计数器)
# Counter也继承自dict
from collections import Counter
persons = ["lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"]
persons_counter = Counter(persons)
# 最简单的统计方法,统计任意可迭代对象,结果按倒序排列
print(persons_counter) # Counter({'wanglin': 3, 'xiaoyan': 2, 'lixingyun': 1})
str_counter = Counter('principle')
print(str_counter) # Counter({'p': 2, 'i': 2, 'r': 1, 'n': 1, 'c': 1, 'l': 1, 'e': 1})
# 追加统计
str_counter.update('people')
print(str_counter) # Counter({'p': 4, 'e': 3, 'i': 2, 'l': 2, 'r': 1, 'n': 1, 'c': 1, 'o': 1})
# 还可以追加其他的counter
str_counter.update(persons_counter)
print(str_counter) # Counter({'p': 4, 'e': 3, 'wanglin': 3, 'i': 2, 'l': 2, 'xiaoyan': 2, 'r': 1, 'n': 1, 'c': 1, 'o': 1, 'lixingyun': 1})
# 统计字典
dict_counter = Counter({"name": "lixingyun", "age": 18})
print(dict_counter) # Counter({'name': 'lixingyun', 'age': 18})
# 统计元组
tuple_counter = Counter((1, 2, 1, 3, 3, 2, 2, 2, 3))
print(tuple_counter) # Counter({2: 4, 3: 3, 1: 2})
# 统计集合
set_counter = Counter({"lixingyun", "wanglin", "xiaoyan", "xiaoyan", "wanglin", "wanglin"})
print(set_counter) # Counter({'wanglin': 1, 'xiaoyan': 1, 'lixingyun': 1})
# 统计队列
from collections import deque
deque_counter = Counter(deque([1, 2, 1, 3, 3, 2, 2, 2, 3]))
print(deque_counter) # Counter({2: 4, 3: 3, 1: 2})
# 统计默认字典
from collections import defaultdict
default_counter = Counter(defaultdict(int, {"name": "lixingyun", "age": 18}))
print(default_counter) # Counter({'name': 'lixingyun', 'age': 18})
# 列出前n个出现次数最多的元素,相当于TopN,如果n为None,则列出所有元素计数
print(persons_counter.most_common(2)) # [('wanglin', 3), ('xiaoyan', 2)]
ChainMap(链式字典)
# ChainMap 也继承自 dict
from collections import ChainMap
user_dict1 = {'name': 'lixingyun', 'address': 20}
user_dict2 = {'gender': 'male', 'address': 'beijing'}
# 如果需要遍历数据,则需要单独遍历,可以通过 ChainMap 将这些 dict 组合起来进行链式遍历
chain_map = ChainMap(user_dict2, user_dict1)
for key, value in chain_map.items():
print(key, "==>", value)
print(chain_map) # ChainMap({'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20})
# 如果多个 dict 中有相同的 key,那么只会保留在参数列表中靠前的那个 dict 的 key
print(chain_map['address']) # beijing
# 增加其他的 dict
new_chain_map = chain_map.new_child({'hobby': 'swimming'})
# 默认添加在头部
print(new_chain_map) # ChainMap({'hobby': 'swimming', 'edu': 'master'}, {'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20})
# 转换成 dict 的 array 数组
print(new_chain_map.maps) # [{'hobby': 'swimming'}, {'gender': 'male', 'address': 'beijing'}, {'name': 'lixingyun', 'address': 20}]
感谢支持
更多内容,请移步《超级个体》。