- 帖子
- 26
- 精华
- 0
- 积分
- 137
- 阅读权限
- 20
- 注册时间
- 2017-12-12
- 最后登录
- 2018-6-15
|
def parse_page_detail(html,url): #解析子页面
soup = BeautifulSoup(html,'lxml')
title = soup.select('title')[0].get_text()
#print(title)
images_pattern = re.compile('gallery: JSON.parse\((.*?)\),\n',re.S)
result = re.search(images_pattern,html)
if result:
#print(result.group(1))
data = json.loads(result.group(1)) #loads方法应该是把json对象转化为字典,但这里不知道为什么成了字符串对象?
data = eval(data) # 将字符串对象转化为字典,用eval或exec方法。
if data and 'sub_images' in data.keys():
sub_images = data.get('sub_images')
#print(sub_images)
images = [item.get('url') for item in sub_images]
#print(images)
for image in images:
image = image.replace('\\','')
#print(image)
download_image(image)
return {
'title':title,
'url':url,
'images':images
}
给你参考一下 |
|