- 帖子
- 1
- 精华
- 0
- 积分
- 5
- 阅读权限
- 10
- 注册时间
- 2020-10-14
- 最后登录
- 2020-10-14
|
#-*- coding:utf-8 -*-
import re
from bs4 import BeautifulSoup
import xlwt
import sqlite3
import urllib.request,urllib.error
import urllib.parse
def main():
gethtml(url = "https://movie.douban.com/top250?start=0")
getdata(baseurl = "https://movie.douban.com/top250?start=0")
# def find_thing():
def gethtml(url):
html = ""
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
}
req = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(req)
print(response.read().decode("utf-8"))
# return html
def getdata(baseurl):
datalist = []
for i in range(0,1):
url = baseurl + str(i*25)
html = gethtml(url)
soup = BeautifulSoup(html,"html.parser")
for item in soup.find_all('div',class_ ="item"):
datalist.append(item)
if __name__ == '__main__':
main()
Traceback (most recent call last):
File "C:/Users/11929/PycharmProjects/untitled1/htmlfirst.py", line 47, in <module>
main()
File "C:/Users/11929/PycharmProjects/untitled1/htmlfirst.py", line 13, in main
getdata(baseurl = "https://movie.douban.com/top250?start=0")
File "C:/Users/11929/PycharmProjects/untitled1/htmlfirst.py", line 37, in getdata
soup = BeautifulSoup(html,"html.parser")
File "C:\Users\11929\AppData\Local\Programs\Python\Python36\lib\site-packages\bs4\__init__.py", line 310, in __init__
elif len(markup) <= 256 and (
TypeError: object of type 'NoneType' has no len()
|
|