- 帖子
- 3
- 精华
- 0
- 积分
- 11
- 阅读权限
- 10
- 注册时间
- 2018-1-2
- 最后登录
- 2018-1-3
|
本帖最后由 tanshipa 于 2018-1-2 18:00 编辑
我在做一个微信朋友圈的投票器,是第三方厂商做的投票 https://cx.cms01.com ,
限制了每人每IP地址每天只能投一票,我想用爬虫突破这一限制,达到刷票的目的。
我引入了代理ip池,可以做到每次以一个新的代理ip地址发起访问,但是服务器是通过对cookie PHPSESSID来判断是不是同一个客户端,即便用不同的ip地址发起访问,还是不能绕过服务器限制。
想问怎么样才能每次获得一个新的PHPSESSID?
我觉得把代码提上应该更直观,这是完整的代码,老师们可以直接运行看结果...
# coding=UTF-8
from bs4 import BeautifulSoup
import requests
import random
import urllib2
import cookielib
def get_ip_list(url, headers):
web_data = requests.get(url, headers=headers)
soup = BeautifulSoup(web_data.text, 'lxml')
ips = soup.find_all('tr')
ip_list = []
for i in range(1, len(ips)):
ip_info = ips
tds = ip_info.find_all('td')
ip_list.append(tds[1].text + ':' + tds[2].text)
return ip_list
def get_random_ip(ip_list):
proxy_list = []
for ip in ip_list:
proxy_list.append('http://' + ip)
proxy_ip = random.choice(proxy_list)
proxies = {'http': proxy_ip}
return proxies
if __name__ == '__main__':
url = 'http://www.xicidaili.com/nn/'
proxy_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
}
ip_list = get_ip_list(url, headers=proxy_headers)
proxies = get_random_ip(ip_list)
print(proxies)
print(proxies['http'])
httpproxy_handler = urllib2.ProxyHandler(proxies)
# 构建一个CookieJar对象实例来保存cookie
cookiejar = cookielib.CookieJar()
# 使用HTTPCookieProcessor()来创建cookie处理器对象,参数为CookieJar()对象
cookie_handler = urllib2.HTTPCookieProcessor(cookiejar)
cookie_opener = urllib2.build_opener(cookie_handler, httpproxy_handler)
# 4. 以get方法访问页面,访问之后会自动保存cookie到cookiejar中
cookie_headers = {
"Host": "cx.cms01.com",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat QBCore/3.43.691.400 QQBrowser/9.0.2524.400",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,en-us;q=0.6,en;q=0.5;q=0.4",
"Referer": "https://cx.cms01.com/app/index.php?c=entry&do=show&m=xiaof_toupiao&i=1&sid=63&id=3979&wxref=mp.weixin.qq.com&from=timeline"
}
cookie_request = urllib2.Request(
# "https://cx.cms01.com/app/index.php?c=entry&do=show&m=xiaof_toupiao&i=1&sid=63&id=3979&wxref=mp.weixin.qq.com&from=timeline",
# "https://cx.cms01.com/app/index.php?i=1&c=utility&a=visit&do=showjs&m=xiaof_toupiao",
"https://cx.cms01.com/app/index.php?c=entry&do=vote&m=xiaof_toupiao&i=1&sid=63&id=3979&type=click&wxref=mp.weixin.qq.com",
headers=cookie_headers)
cookie_opener.open(cookie_request)
## 可以按标准格式将保存的Cookie打印出来
cookieStr = ""
for item in cookiejar:
cookieStr = cookieStr + item.name + "=" + item.value + ";"
print cookieStr[:-1]
opener = urllib2.build_opener(httpproxy_handler)
headers = {
"Host": "cx.cms01.com",
"Connection": "keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat QBCore/3.43.691.400 QQBrowser/9.0.2524.400",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,en-us;q=0.6,en;q=0.5;q=0.4",
"X-Requested-With": "XMLHttpRequest",
"Referer": "https://cx.cms01.com/app/index.php?c=entry&do=show&m=xiaof_toupiao&i=1&sid=63&id=3979&wxref=mp.weixin.qq.com&from=timeline&wxref=mp.weixin.qq.com",
"Cookie": "PHPSESSID=" + item.value
}
print ('cookie in headers is ' + headers["Cookie"])
request = urllib2.Request(
"https://cx.cms01.com/app/index.php?c=entry&do=vote&m=xiaof_toupiao&i=1&type=good&id=3979",
headers=headers)
print (request.get_full_url())
response = opener.open(request)
print response.read()
|
|