Crossin的编程教室

标题: gevent 和 multiprocessing.Pool出现卡死的情况这个怎么处理啊 [打印本页]

作者: q960126    时间: 2018-1-15 15:14
标题: gevent 和 multiprocessing.Pool出现卡死的情况这个怎么处理啊
# coding:utf-8
import requests
import time
from lxml import etree
import os,re,hashlib
import gevent
from gevent import monkey
from multiprocessing import Pool
monkey.patch_all()

header = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
}
'105'
'https://tieba.baidu.com/p/5033202671?see_lz=1&pn=1'
def get_html(url):

    try:
        r = requests.get(url,headers = header)
        r.encoding = r.apparent_encoding
        r.raise_for_status()
        return r.text
    except:
        print('get_html 出错')
        return None
def download(image_url):
    content = requests.get(image_url,headers = header).content
    path = 'D:/python/贴吧'
    if  not os.path.exists(path):
        os.makedirs(path)
    os.chdir(path)
    md5 = hashlib.md5(content).hexdigest()
    with open(md5+'.jpg','wb') as f:
        f.write(content)

def parse_index(url):
    sel = etree.HTML(get_html(url))
    image_urls = sel.xpath('//img[@class="BDE_Image"]//@src')
    tocks = []
    for url in image_urls:
        tocks.append(gevent.spawn(download,url))
    gevent.joinall(tocks)

def main():
    start_time = time.time()
    tp_url = 'https://tieba.baidu.com/p/5033202671?see_lz=1&pn={}'
    pool = Pool(4)
    for i in range(1,5):
        pool.apply_async(parse_index,args=(tp_url.format(i),))
        # parse_index(tp_url.format(i))
    pool.close()
    pool.join()
    print('用时',time.time()-start_time)



if __name__ == '__main__':
    main()



作者: crossin先生    时间: 2018-1-16 15:32
先确认是卡死在哪里
或者简化代码,先调通多进程的结构





欢迎光临 Crossin的编程教室 (https://bbs.crossincode.com/) Powered by Discuz! X2.5