将键映射到多个值当中
#一般的创建方法
d = {
"a":[1,2,3],
"b":[4,5,6]
}
#使用defaultdict创建字典>>> import collections
>>> import collections
>>> d = defaultdict(list)
>>> d =collections.defaultdict(list)
>>> d["a"].append(1)
>>> d["a"].append(2)
>>> d
defaultdict(<class 'list'>, {'a': [1, 2]})
>>> d["b"].append(3)
>>> d
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [3]})
>>> e=collections.defaultdict(set)
>>> e["a"].add(3)
>>> e["a"].add(4)
>>> e
defaultdict(<class 'set'>, {'a': {3, 4}})
让字典保持有序
#OrderedDict能够让字典按照加入的顺序进行排列
from collections import OrderedDict
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
for key in d:
print(key,d[key])
#foo 1
#bar 2
#spam 3
#grok 4
orderedDict
在构建json时比较有用,可以按照固定的顺序进行解析
#OrderedDict能够让字典按照加入的顺序进行排列
from collections import OrderedDict
import json
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
a=json.dumps(d)
print(a)
#{"foo": 1, "bar": 2, "spam": 3, "grok": 4}
orderedDict
内部维护了一个链表,因此大小比较大
与字典有关的计算问题
可以使用zip()
来翻转字典的值和键
prices = {
"ACME": 45.23,
"AAPL": 612.78,
"IBM": 205.55,
"HPQ": 37.20
}
min_price = min (zip (prices.values(),prices.keys()))
print(min_price)
max_price = max (zip (prices.values(),prices.keys()))
print(max_price)
# (37.2, 'HPQ')
# (612.78, 'AAPL')
请注意,zip()
创建了一个迭代器,它的内容只能被使用一次
在两个字典中寻找相同点
可以使用keys()
或者items()
方法执行常见的集合操作,可以找到两个字典当中相似的地方
a ={'x':1,'y':2,'z':3}
b ={'w':10,'x':11,'y':2}
print(a.keys() & b.keys()) #交集
print(a.keys() - b.keys()) #差集
print(a.items() & b.items()) #交集
# {'y', 'x'}
# {'z'}
# {('y', 2)}
值不支持集合操作,因为并不能够保证所有的值都是唯一的
从序列中移除重复项且保持元素间顺序不变
如果序列是可哈希的,那么可以以下方法
def dedupe(items):
seen=set()
for item in items:
if item not in seen:
yield(item)
seen.add(item)
a= [4,34,2,3,3,5,5,6,6]
for i in dedupe(a):
print(i,end=" ")
# 4 34 2 3 5 6
a= [4,34,2,3,3,5,5,6,6]
#去掉重复数据,但是不能保证顺序
b = set(a)
print(b)
#{34, 3, 2, 4, 5, 6}
对切片命名
可以使用slice()
函数来创建切片对象
items = [0,1,2,3,4,5,6]
a = slice (2,4)
print(items[a])
# [2, 3]
items = [0,1,2,3,4,5,6]
a = slice (2,4)
print(items[a])
#打印slice对象属性
print(a.start)
print(a.stop)
print(a.step)
# [2, 3]
# 2
# 4
# None
使用indices(size)
将切片映射到特定大小的序列上
items = [0,1,2,3,4,5,6]
a = slice (2,4)
print(a)
#返回一个元组(start,stop,step)
tup=a.indices(len(items))
print(tup)
# slice(2, 4, None)
# (2, 4, 1)
找出序列中出现次数最多的元素
from collections import Counter
words = ['look','into','my','eyes','look','into','my','eyes','the','eyes','the','eyes']
coun_word = Counter(words)
#获得前三项
top_three = coun_word.most_common(3)
print(top_three)
# [('eyes', 4), ('look', 2), ('into', 2)]
from collections import Counter
words = ['look','into','my','eyes','look','into','my','eyes','the','eyes','the','eyes']
word_2 = ['look','into','my','eyes']
coun_word = Counter(words)
coun_word2 = Counter(word_2)
print(coun_word)
print(coun_word2)
#做算术运算
print(coun_word-coun_word2)
print(coun_word+coun_word2)
# Counter({'eyes': 4, 'look': 2, 'into': 2, 'my': 2, 'the': 2})
# Counter({'look': 1, 'into': 1, 'my': 1, 'eyes': 1})
# Counter({'eyes': 3, 'the': 2, 'look': 1, 'into': 1, 'my': 1})
# Counter({'eyes': 5, 'look': 3, 'into': 3, 'my': 3, 'the': 2})
对不原生支持比较操作的对象排序
sorted
函数可接受一个用来传递可调用对象的参数key,该调用对象会返回待排序对象中某些值,sorted则利用这些值来进行排序
对于函数max()
和min()
都可以使用key进行提取
import operator
class User:
id = 0
def __init__(self,id):
self.id =id
def print_user(self):
print(self.id,end=" ")
a=[User(4),User(3),User(6),User(7)]
#使用key进行排序
b= sorted(a,key=lambda u:u.id)
#使用attrgetter进行排序,可以使用多个字段
c= sorted(a,key=operator.attrgetter('id'))
for i in b:
i.print_user()
print()
for j in c:
j.print_user()
# 3 4 6 7
# 3 4 6 7
根据字段将记录分组
from operator import itemgetter
from itertools import groupby
rows =[
{'address': '5412 N CLARK','date':'07/01/2012'},
{'address': '5413 N CLARK','date':'07/04/2012'},
{'address': '5414 N CLARK','date':'07/02/2012'},
{'address': '5415 N CLARK','date':'07/03/2012'},
{'address': '5416 N CLARK','date':'07/02/2012'},
{'address': '5417 N CLARK','date':'07/02/2012'},
{'address': '5418 N CLARK','date':'07/01/2012'},
{'address': '5419 N CLARK','date':'07/04/2012'}
]
rows.sort(key=itemgetter('date'))
# groupby只能检查连续的项,因此要先进行排序
# 返回一个值和一个子迭代器
for data,items in groupby(rows,key=itemgetter('date')):
print(data,end="")
print("\n")
for i in items:
print(i)
# 07/01/2012
# {'address': '5412 N CLARK', 'date': '07/01/2012'}
# {'address': '5418 N CLARK', 'date': '07/01/2012'}
# 07/02/2012
# {'address': '5414 N CLARK', 'date': '07/02/2012'}
# {'address': '5416 N CLARK', 'date': '07/02/2012'}
# {'address': '5417 N CLARK', 'date': '07/02/2012'}
# 07/03/2012
# {'address': '5415 N CLARK', 'date': '07/03/2012'}
# 07/04/2012
# {'address': '5413 N CLARK', 'date': '07/04/2012'}
# {'address': '5419 N CLARK', 'date': '07/04/2012'}
如果只是简单地进行分组,那么可以放进一个大的数据结构
from collections import defaultdict
rows =[
{'address': '5412 N CLARK','date':'07/01/2012'},
{'address': '5413 N CLARK','date':'07/04/2012'},
{'address': '5414 N CLARK','date':'07/02/2012'},
{'address': '5415 N CLARK','date':'07/03/2012'},
{'address': '5416 N CLARK','date':'07/02/2012'},
{'address': '5417 N CLARK','date':'07/02/2012'},
{'address': '5418 N CLARK','date':'07/01/2012'},
{'address': '5419 N CLARK','date':'07/04/2012'}
]
row_date=defaultdict(list)
for row in rows:
row_date[row['date']].append(row)
for j in row_date['07/01/2012']:
print(j)
# {'address': '5412 N CLARK', 'date': '07/01/2012'}
# {'address': '5418 N CLARK', 'date': '07/01/2012'}
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END
暂无评论内容