2.数据结构和算法(二)

将键映射到多个值当中

#一般的创建方法
d = {
    "a":[1,2,3],
    "b":[4,5,6]
}

#使用defaultdict创建字典>>> import collections
>>> import collections
>>> d = defaultdict(list)
>>> d =collections.defaultdict(list)
>>> d["a"].append(1)
>>> d["a"].append(2)
>>> d
defaultdict(<class 'list'>, {'a': [1, 2]})
>>> d["b"].append(3)
>>> d
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [3]})
>>> e=collections.defaultdict(set)
>>> e["a"].add(3)
>>> e["a"].add(4)
>>> e
defaultdict(<class 'set'>, {'a': {3, 4}})

让字典保持有序

#OrderedDict能够让字典按照加入的顺序进行排列

from collections import OrderedDict
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

for key in d:
    print(key,d[key])

#foo 1
#bar 2
#spam 3
#grok 4

orderedDict在构建json时比较有用,可以按照固定的顺序进行解析

#OrderedDict能够让字典按照加入的顺序进行排列
from collections import OrderedDict
import json

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

a=json.dumps(d)
print(a)

#{"foo": 1, "bar": 2, "spam": 3, "grok": 4}

orderedDict内部维护了一个链表,因此大小比较大

与字典有关的计算问题

可以使用zip()来翻转字典的值和键

prices = {
    "ACME": 45.23,
    "AAPL": 612.78,
    "IBM": 205.55,
    "HPQ": 37.20
}

min_price = min (zip (prices.values(),prices.keys()))
print(min_price)

max_price = max (zip (prices.values(),prices.keys()))
print(max_price)

# (37.2, 'HPQ')
# (612.78, 'AAPL')

请注意,zip()创建了一个迭代器,它的内容只能被使用一次

在两个字典中寻找相同点

可以使用keys()或者items()方法执行常见的集合操作,可以找到两个字典当中相似的地方

a ={'x':1,'y':2,'z':3}
b ={'w':10,'x':11,'y':2}

print(a.keys() & b.keys())  #交集
print(a.keys() - b.keys())  #差集
print(a.items() & b.items()) #交集

# {'y', 'x'}
# {'z'}
# {('y', 2)}

值不支持集合操作,因为并不能够保证所有的值都是唯一的

从序列中移除重复项且保持元素间顺序不变

如果序列是可哈希的,那么可以以下方法

def dedupe(items):
    seen=set()
    for item in items:
        if item not in seen:
            yield(item)
            seen.add(item)

a= [4,34,2,3,3,5,5,6,6]

for i in dedupe(a):
    print(i,end=" ")

# 4 34 2 3 5 6
a= [4,34,2,3,3,5,5,6,6]

#去掉重复数据,但是不能保证顺序
b = set(a)

print(b)

#{34, 3, 2, 4, 5, 6}

对切片命名

可以使用slice()函数来创建切片对象

items = [0,1,2,3,4,5,6]
a = slice (2,4)
print(items[a])

# [2, 3]
items = [0,1,2,3,4,5,6]
a = slice (2,4)
print(items[a])

#打印slice对象属性
print(a.start)
print(a.stop)
print(a.step)

# [2, 3]
# 2
# 4
# None

使用indices(size)将切片映射到特定大小的序列上

items = [0,1,2,3,4,5,6]
a = slice (2,4)

print(a)

#返回一个元组(start,stop,step)
tup=a.indices(len(items))
print(tup)

# slice(2, 4, None)
# (2, 4, 1)

找出序列中出现次数最多的元素

from collections import Counter
words = ['look','into','my','eyes','look','into','my','eyes','the','eyes','the','eyes']
coun_word = Counter(words)

#获得前三项
top_three = coun_word.most_common(3)
print(top_three)

# [('eyes', 4), ('look', 2), ('into', 2)]
from collections import Counter
words = ['look','into','my','eyes','look','into','my','eyes','the','eyes','the','eyes']
word_2 = ['look','into','my','eyes']

coun_word = Counter(words)
coun_word2 = Counter(word_2)

print(coun_word)
print(coun_word2)

#做算术运算
print(coun_word-coun_word2)
print(coun_word+coun_word2)

# Counter({'eyes': 4, 'look': 2, 'into': 2, 'my': 2, 'the': 2})
# Counter({'look': 1, 'into': 1, 'my': 1, 'eyes': 1})

# Counter({'eyes': 3, 'the': 2, 'look': 1, 'into': 1, 'my': 1})
# Counter({'eyes': 5, 'look': 3, 'into': 3, 'my': 3, 'the': 2})

对不原生支持比较操作的对象排序

sorted函数可接受一个用来传递可调用对象的参数key,该调用对象会返回待排序对象中某些值,sorted则利用这些值来进行排序

对于函数max()min()都可以使用key进行提取

import operator
class User:
    id = 0
    def __init__(self,id):
        self.id =id

    def print_user(self):
        print(self.id,end=" ")
a=[User(4),User(3),User(6),User(7)]

#使用key进行排序
b= sorted(a,key=lambda u:u.id)

#使用attrgetter进行排序,可以使用多个字段
c= sorted(a,key=operator.attrgetter('id'))

for i in b:
    i.print_user()
print()
for j in c:
    j.print_user()

# 3 4 6 7
# 3 4 6 7

根据字段将记录分组

from operator import itemgetter
from itertools import groupby
rows =[
    {'address': '5412 N CLARK','date':'07/01/2012'},
    {'address': '5413 N CLARK','date':'07/04/2012'},
    {'address': '5414 N CLARK','date':'07/02/2012'},
    {'address': '5415 N CLARK','date':'07/03/2012'},
    {'address': '5416 N CLARK','date':'07/02/2012'},
    {'address': '5417 N CLARK','date':'07/02/2012'},
    {'address': '5418 N CLARK','date':'07/01/2012'},
    {'address': '5419 N CLARK','date':'07/04/2012'}
]

rows.sort(key=itemgetter('date'))

# groupby只能检查连续的项,因此要先进行排序
# 返回一个值和一个子迭代器
for data,items in groupby(rows,key=itemgetter('date')):
    print(data,end="")
    print("\n")
    for i in items:
        print(i)
# 07/01/2012

# {'address': '5412 N CLARK', 'date': '07/01/2012'}
# {'address': '5418 N CLARK', 'date': '07/01/2012'}
# 07/02/2012

# {'address': '5414 N CLARK', 'date': '07/02/2012'}
# {'address': '5416 N CLARK', 'date': '07/02/2012'}
# {'address': '5417 N CLARK', 'date': '07/02/2012'}
# 07/03/2012

# {'address': '5415 N CLARK', 'date': '07/03/2012'}
# 07/04/2012

# {'address': '5413 N CLARK', 'date': '07/04/2012'}
# {'address': '5419 N CLARK', 'date': '07/04/2012'}

如果只是简单地进行分组,那么可以放进一个大的数据结构

from collections import defaultdict
rows =[
    {'address': '5412 N CLARK','date':'07/01/2012'},
    {'address': '5413 N CLARK','date':'07/04/2012'},
    {'address': '5414 N CLARK','date':'07/02/2012'},
    {'address': '5415 N CLARK','date':'07/03/2012'},
    {'address': '5416 N CLARK','date':'07/02/2012'},
    {'address': '5417 N CLARK','date':'07/02/2012'},
    {'address': '5418 N CLARK','date':'07/01/2012'},
    {'address': '5419 N CLARK','date':'07/04/2012'}
]

row_date=defaultdict(list)

for row in rows:
    row_date[row['date']].append(row)

for j in row_date['07/01/2012']:
    print(j)

# {'address': '5412 N CLARK', 'date': '07/01/2012'}
# {'address': '5418 N CLARK', 'date': '07/01/2012'}
© 版权声明
THE END
喜欢就支持以下吧
点赞0
分享
评论 抢沙发
四曲的头像-四曲博客

昵称

取消
昵称表情代码图片