Crossin的编程教室
标题:
gevent 和 multiprocessing.Pool出现卡死的情况这个怎么处理啊
[打印本页]
作者:
q960126
时间:
2018-1-15 15:14
标题:
gevent 和 multiprocessing.Pool出现卡死的情况这个怎么处理啊
# coding:utf-8
import
requests
import
time
from
lxml
import
etree
import
os
,
re
,
hashlib
import
gevent
from
gevent
import
monkey
from
multiprocessing
import
Pool
monkey.patch_all()
header = {
'user-agent'
:
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
}
'105'
'https://tieba.baidu.com/p/5033202671?see_lz=1&pn=1'
def
get_html
(url):
try
:
r = requests.get(url
,
headers
= header)
r.encoding = r.apparent_encoding
r.raise_for_status()
return
r.text
except
:
print
(
'get_html 出错'
)
return None
def
download
(image_url):
content = requests.get(image_url
,
headers
= header).content
path =
'D:/python/贴吧'
if not
os.path.exists(path):
os.makedirs(path)
os.chdir(path)
md5 = hashlib.md5(content).hexdigest()
with
open
(md5+
'.jpg'
,
'wb'
)
as
f:
f.write(content)
def
parse_index
(url):
sel = etree.HTML(get_html(url))
image_urls = sel.xpath(
'//img[@class="BDE_Image"]//@src'
)
tocks = []
for
url
in
image_urls:
tocks.append(gevent.spawn(download
,
url))
gevent.joinall(tocks)
def
main
():
start_time = time.time()
tp_url =
'https://tieba.baidu.com/p/5033202671?see_lz=1&pn={}'
pool = Pool(
4
)
for
i
in
range
(
1
,
5
):
pool.apply_async(parse_index
,
args
=(tp_url.format(i)
,
))
# parse_index(tp_url.format(i))
pool.close()
pool.join()
print
(
'用时'
,
time.time()-start_time)
if
__name__ ==
'__main__'
:
main()
作者:
crossin先生
时间:
2018-1-16 15:32
先确认是卡死在哪里
或者简化代码,先调通多进程的结构
欢迎光临 Crossin的编程教室 (https://bbs.crossincode.com/)
Powered by Discuz! X2.5