- 帖子
- 37
- 精华
- 0
- 积分
- 476
- 阅读权限
- 30
- 注册时间
- 2018-3-31
- 最后登录
- 2019-10-26
|
#-*-coding: utf-8 -*-
#D:\ProgramData\Anaconda3\envs\py36\python.exe code.py 127.0.0.1
import web
import urllib.request
import json
import time
urls = (
'/', 'index',
'/movie/(\d+)', 'movie',
)
render = web.template.render('templates/')
#web.py 连接 SQLite
db = web.database(dbn='sqlite', db='MovieSite.db')
def add_movie(data):
movie = json.loads(data)
#print (movie['title'])
db.insert('movie',
id = int(movie['id']),
title = movie['title'],
origin = movie['original_title'],
url = movie['alt'],
rating = movie['rating']['average'],
image = movie['images']['large'],
directors = ','.join([d['name'] for d in movie['directors']]),
casts = ','.join([c['name'] for c in movie['casts']]),
year = movie['year'],
genres = ','.join(movie['genres']),
countries = ','.join(movie['countries']),
summary = movie['summary'],
)
def movie_exist(data):
movie = json.loads(data)
n_id = int(movie['id'])
pass
class index:
def GET(self):
movies = db.select('movie')
return render.index(movies)
def POST(self):
data = web.input()
condition = r'title like "%' + data.title + r'%"'
movies = db.select('movie', where=condition)
return render.index(movies)
class movie:
def GET(self, movie_id):
condition = 'id=' + movie_id
movie = db.select('movie', where=condition)[0]
return render.movie(movie)
movie_ids = []
for index in range(0, 250, 50):
response = urllib.request.urlopen('http://api.douban.com/v2/movie/top250?start=%d&count=50' % index)
data = response.read()
data_json = json.loads(data)
movie250 = data_json['subjects']
for movie in movie250:
movie_ids.append(movie['id'])
#print(movie['id'], movie['title'])
time.sleep(3)
#print(movie_ids)
count = 0
for mid in movie_ids:
#print (count, mid)
try:
response = urllib.request.urlopen('http://api.douban.com/v2/movie/subject/%s' % mid)
data = response.read()
add_movie(data)
count += 1
time.sleep(3)
except:
print('movie %s is not found' % mid)
if __name__ == "__main__":
app = web.application(urls, globals())
app.run()
第一次从头开始sqlite3 MovieSite.db
create table movie (id, title, origin, url, rating, image, directors, casts, year, genres, countries, summary);然后运行上面的程序,进入网页报错
第二次把获取movie_ids和存入数据库的两段注释掉,再运行,就可以正常显示网页了(虽然也没全部抓取到,只有90多个)
麻烦老师看下这会是什么问题?
|
|