- 帖子
- 12
- 精华
- 0
- 积分
- 63
- 阅读权限
- 20
- 注册时间
- 2013-9-11
- 最后登录
- 2013-10-3
|
用urllib2伪造了referer和user-agent,终于抓到了- import urllib2, HTMLParser
- article_list = []
- class MyParser(HTMLParser.HTMLParser):
- def __init__(self):
- HTMLParser.HTMLParser.__init__(self)
- def handle_starttag(self,tag,attrs):
- if tag == 'a':
- for name,value in attrs:
- if name == 'href':
- print(value)
- article_list.append(value)
- def fetch_data(uri):
- request = urllib2.Request(uri)
- request.add_header('Referer','http://chuansongme.com/account/crossincode')
- request.add_header('Content-Type','application/x-www-form-urlencoded')
- request.add_header('User-Agent','fake-client')
- response = urllib2.urlopen(request)
- return response
- list_str = fetch_data('http://chuansongme.com/more/account-crossincode/recent?lastindex=0').read()
- print(list_str)
- my = MyParser()
- my.feed(list_str.decode('utf-8'))
- article = fetch_data(article_list[0]).read()
- print(article)
- f = open('weixin.html','w')
- f.write(article)
- f.close()
复制代码 |
|