- 帖子
- 1
- 精华
- 0
- 积分
- 5
- 阅读权限
- 10
- 注册时间
- 2017-12-25
- 最后登录
- 2017-12-25
|
本帖最后由 Luhuihui 于 2017-12-25 23:26 编辑
大家好!我想上网易云音乐爬取某位歌手的所有歌曲的歌词。于是我上网找了下别人的程序。但是遇到了一些问题,希望各位大神指教。
主要是这一句:
bs_obj = BeautifulSoup(r,'lxml')
报错显示:
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?
但是我已经在Python 3.6 中安装了lxml,界面也显示安装成功了。不知我该怎么解决这个问题呢?谢谢大家!
此外,
url = 'http://music.163.com/artist?id=' + str(singer_ID) #网易云音乐歌手主页的网址r = urllib.request.urlopen(url)
这里的urllib.request的用法我也有些困惑。源程序写的是: r=requests.get(url).text
但我一run就提示requests没有get这个attribute。不知这一行程序在python 3.6版本中该怎么改呢?
程序如下看第二个def的第三、四、五行)
# -*- coding:utf-8 -*-
import urllib #请求URL相关的操作的模块
import urllib.request
from bs4 import BeautifulSoup #Beautiful Soup 是通过解析文档为用户提供需要抓取的数据
import json #用 Python 语言来编码和解码JSON(JavaScript Object Notation)对象
import re # Python里的re模块提供了正则表达式(用于提取数据)的支持
import os #对目录,文件进行操作的模块
import requests
def mkdir(path): #定义一个函数,用于创建一开始不存在的路径
path = path.strip() # 去除首位空格
path = path.rstrip("\\")# 去除尾部 \ 符号
isExists = os.path.exists(path)
if not isExists:
os.makedirs(path) #若查找的路径并不存在,创建这个路径
return True
else:
return False
def get_music_ids_by_singer_id(singer_ID):#定义一个函数,通过歌手ID获取这个歌手所有歌曲的ID
url = 'http://music.163.com/artist?id=' + str(singer_ID) #网易云音乐歌手主页的网址
r = urllib.request.urlopen(url)
print(r)
bs_obj = BeautifulSoup(r,'lxml')
singer_name = bs_obj.select("#artist-name")
singer_name = singer_name[0].get('title')
t = bs_obj.find('textarea')
musics = json.loads(t.text.replace('(','[').replace(')',']').replace('\'','"'))
ids ={}
for music in musics:
ids[music['name']] = music['id']
return ids,singer_name
def get_lyric_by_music_id(music_id):#通过音乐的id得到歌词
lrc_url = 'http://music.163.com/api/song/lyric?' + 'id=' + str(music_id) + '&lv=1&kv=1&tv=-1'
lyric = requests.get(lrc_url)
json_obj = lyric.text
#print(json_obj)
j = json.loads(json_obj)
#print(type(j))#打印出来j的类型是字典
try:#部分歌曲没有歌词,这里引入一个异常
lrc = j['lrc']['lyric']
pat = re.compile(r'\[.*\]')#下面这三行正则匹配删除时间轴
lrc = re.sub(pat,"",lrc)
lrc = lrc.strip()
return lrc
except KeyError as e:
pass
(music_id_set,singer_name_all) = get_music_ids_by_singer_id(5781)
print(music_id_set)
singer_name = singer_name_all.split(' -')[0]
#print(singer_name)
mkpath = '.\\' + singer_name + '\\'
mkdir(mkpath)
for key in music_id_set:
lrc_content = get_lyric_by_music_id(music_id_set[key])
# print(lrc_content)
# print(key)#歌名
#print(singer_name)
f = open(mkpath + key +'.txt', 'w',encoding='utf-8')
try: # 引入异常
#print(type(lrc_content.encode('utf-8')))
f.write(lrc_content)
f.close()
except AttributeError as e2:
pass
目前的报错信息:
Traceback (most recent call last):
File "E:/Python 程序/xue E.py", line 53, in <module>
(music_id_set,singer_name_all) = get_music_ids_by_singer_id(5781)
File "E:/Python 程序/xue E.py", line 25, in get_music_ids_by_singer_id
bs_obj = BeautifulSoup(r,'lxml')
File "E:\Python 3.6\lib\bs4\__init__.py", line 165, in __init__
% ",".join(features))
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library?
Process finished with exit code 1
|
|