前两天我在上班时候被网站一直拦截,突然想到了一个想法,如果可以不停的更换代理,会不会好点呢,说着我就打开神器百度搜索开始了。
在经过一番搜索之后,我发现网上大部分文章都指向了一个地址:
HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings ,应该是用修改IE注册表的方式进行实现的,而平常的脚本中设置代理,我更喜欢这种修改注册表的方式。经过一番复制粘贴。我完成了最初版本的代码:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import io, sys, time, re, os,urllib2
from bs4 import BeautifulSoup
import _winreg
import urllib
import socket
User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
header = {}
header['User-Agent'] = User_Agent
#获取 IP
def getProxyIp():
proxy = []
for i in range(10):
try:
url = 'http://www.xicidaili.com/nn/'+str(i)
req = urllib2.Request(url,headers=header)
res = urllib2.urlopen(req).read()
soup = BeautifulSoup(res,"html.parser")
ips = soup.findAll('tr')
for x in range(1,len(ips)):
ip = ips[x]
tds = ip.findAll("td")
ip_temp = tds[1].contents[0]+":"+tds[2].contents[0]
proxy.append(ip_temp)
except:
continue
return proxy
#验证IP
def testProxys(proxys):
""" Test the proxys. """
validProxys = []
Url = "http://ip.chinaz.com/getip.aspx"
for proxy in proxys:
try:
# set proxy
proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy})
opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(opener)
# request website
response = urllib2.urlopen(Url, timeout=5).read()
# set filtration condition according website
if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy
validProxys.append(proxy)
print "%s\t%s" % (proxy, response)
except Exception as e:
print "%s\t%s" % (proxy, "invalid")
continue
return validProxys
#设置代理函数
def enableProxy(IP, Port):
proxy = IP + ":" + str(Port)
xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings"
try:
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE)
_winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 1)
_winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy)
except Exception as e:
print("ERROR: " + str(e.args))
finally:
None
#取消代理函数
def disableProxy():
proxy = ""
xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings"
try:
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE)
_winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 0)
_winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy)
except Exception as e:
print("ERROR: " + str(e.args))
finally:
None
def main():
proxy = getProxyIp()
testProxys(proxy)
try:
disableProxy()
while True:
for ip in validProxys:
iplist = ip.split(':')
ip = "\""+iplist[0]+"\""
port = iplist[1]
print "This is IP "+ip+':'+port
enableProxy(ip,port)
time.sleep(5)
except Exception as e:
print("ERROR: " + str(e.args))
finally:
pass
if __name__ == '__main__':
main()
然后我兴致勃勃的去实验了一番,嗯,换了一次,然后,欸,网页咋打不开了呢?
然后我又搜索另一篇文章看到:其实真正的注册表是
HKCU\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections
下面中的DefaultConnectionSetting 文件中保留着的16进制格式,我也开始怀疑写第一种的是不是就实验了一次的原因,然后我又看了这篇文章,主要是通过命令行的形式去指定一个IP与端口。我就想到要是可以自动切换多好,IP我就从网上爬,其实我后来用到了scylla这个python库,他里面有提供的一些代理可以去使用。
然后我安装scylla,先用python把提供的IP与端口爬下来,然后进行验证,最后遍历出来去每隔多长时间设置一次代理,这样就基本完成了我的想法,在经过了一番纠结之后,完成了这个小工具,代码如下:
#!/usr/bin/python
# -*- coding:utf-8 -*-
import os, sys, re,requests,urllib2,time
#可以自行设置请求头
User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
header = {}
header['User-Agent'] = User_Agent
#定义获取代理函数(网上免费版,吐槽QVQ)
def getfreeProxyIp():
proxy = []
for i in range(10):
try:
url = 'http://www.xicidaili.com/nn/'+str(i)
req = urllib2.Request(url,headers=header)
res = urllib2.urlopen(req).read()
soup = BeautifulSoup(res,"html.parser")
ips = soup.findAll('tr')
for x in range(1,len(ips)):
ip = ips[x]
tds = ip.findAll("td")
ip_temp = tds[1].contents[0]+":"+tds[2].contents[0]
proxy.append(ip_temp)
except:
continue
return proxy
# 定义获取代理函数(scylla版本,至少我找到了可以用的)
def getProxyIp():
url = 'http://192.168.110.128:8899/api/v1/proxies'
r = requests.get(url).json()
proxy = []
try:
for i in r["proxies"]:
ip = str(i['ip'])+":"+str(i['port'])
proxy.append(ip)
except:
pass
return proxy
# 定义验证函数,提取可用IP代理
def testProxys(proxys):
""" Test the proxys. """
validProxys = []
Url = "http://ip.chinaz.com/getip.aspx"
for proxy in proxys:
try:
# set proxy
proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy})
opener = urllib2.build_opener(proxy_handler)
urllib2.install_opener(opener)
# request website
response = urllib2.urlopen(Url, timeout=5).read()
# set filtration condition according website
if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy
validProxys.append(proxy)
print "%s\t%s" % (proxy)
except Exception as e:
continue
return validProxys
#定义设置代理函数
def regIESettings(op, noLocal=False, ip='', pac=''):
'''
# 根据需求生成Windows代理设置注册表的.reg文件内容
# DefaultConnectionSettings项是二进制项
'''
if not op : return
# 如果是设置IP代理的模式 则检查IP地址的有效性(允许为空,但不允许格式错误)
if 'Proxy' in op and not ip == '':
# if len(extractIp(ip))==0
if 1 > len(re.findall('([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\s*:{0,1}\s*([0-9]{1,5}){0,1}',ip)) :
print '---Unexpected IP Address:%s---'%ip
return
options = {'On':'0F','Off':'01','ProxyOnly':'03','PacOnly':'05','ProxyAndPac':'07','D':'09','DIP':'0B','DS':'0D'}
if op == 'Off':
reg_value = '46,00,00,00,00,00,00,00,01'
else:
switcher = options.get(op)
if not switcher:
print '\n---Unexpected Option. Please check the value after [-o]---\n'
return
skipLocal = '07,00,00,00,%s'%__toHex('<local>') if noLocal else '00'
reg_value = '46,00,00,00,00,00,00,00,%(switcher)s,00,00,00,%(ipLen)s,00,00,00,%(ip)s00,00,00,%(skipLocal)s,21,00,00,00%(pac)s' % ({ 'switcher':switcher,'ipLen':__toHex(len(ip)),'ip':__toHex(ip)+',' if ip else '','infoLen':__toHex(len('<local>')),'skipLocal':skipLocal,'pac':','+__toHex(pac) if pac else '' })
settings = 'Windows Registry Editor Version 5.00\n[HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections]\n"DefaultConnectionSettings"=hex:%s' % reg_value
# print 'Using proxy address: %s' % ip
# print op, ip, pac
# print options[op] +'\n'+ __toHex(ip) +'\n'+ __toHex(pac)
# print settings
# === 生成reg文件并导入到注册表中 ===
filePath = '%s\DefaultConnectionSettings.reg'%os.getcwd()
with open(filePath, 'w') as f:
f.write( settings )
cmd = 'reg import "%s"' %filePath
result = os.popen(cmd)
if len(result.readlines()) < 2 :
print ''
return
def __toHex(obj):
if obj == '': return ''
elif obj == 0 or obj == '0' or obj == '00': return '00'
if isinstance(obj, str):
rehex = [str(hex(ord(s))).replace('0x','') for s in obj]
return ','.join(rehex)
elif isinstance(obj, int):
num = str(hex(obj)).replace('0x', '')
return num if len(num)>1 else '0'+num # 如果是一位数则自动补上0,7为07,e为0e
def main():
regIESettings(op='Off', ip='', pac='', noLocal=False)
proxy = getProxyIp()
validProxys = testProxys(proxy)
print '---Start agent---'
while True:
for ip in validProxys:
try:
print 'Being used'+ip
regIESettings(op='ProxyOnly', ip=ip, pac='', noLocal=False)
time.sleep(20)
except:
print 'GG!'
if __name__ == '__main__':
main()
代码大概就是这样子的,然后我最后打包了一些程序,实现效果大概是这样子的:
原创文章,作者:Y4er,未经授权禁止转载!如若转载,请联系作者:Y4er


微信扫一扫
支付宝扫一扫 