前两天我在上班时候被网站一直拦截,突然想到了一个想法,如果可以不停的更换代理,会不会好点呢,说着我就打开神器百度搜索开始了。
在经过一番搜索之后,我发现网上大部分文章都指向了一个地址:
HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings ,应该是用修改IE注册表的方式进行实现的,而平常的脚本中设置代理,我更喜欢这种修改注册表的方式。经过一番复制粘贴。我完成了最初版本的代码:
#!/usr/bin/python # -*- coding: utf-8 -*- import io, sys, time, re, os,urllib2 from bs4 import BeautifulSoup import _winreg import urllib import socket User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0' header = {} header['User-Agent'] = User_Agent #获取 IP def getProxyIp(): proxy = [] for i in range(10): try: url = 'http://www.xicidaili.com/nn/'+str(i) req = urllib2.Request(url,headers=header) res = urllib2.urlopen(req).read() soup = BeautifulSoup(res,"html.parser") ips = soup.findAll('tr') for x in range(1,len(ips)): ip = ips[x] tds = ip.findAll("td") ip_temp = tds[1].contents[0]+":"+tds[2].contents[0] proxy.append(ip_temp) except: continue return proxy #验证IP def testProxys(proxys): """ Test the proxys. """ validProxys = [] Url = "http://ip.chinaz.com/getip.aspx" for proxy in proxys: try: # set proxy proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy}) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) # request website response = urllib2.urlopen(Url, timeout=5).read() # set filtration condition according website if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy validProxys.append(proxy) print "%s\t%s" % (proxy, response) except Exception as e: print "%s\t%s" % (proxy, "invalid") continue return validProxys #设置代理函数 def enableProxy(IP, Port): proxy = IP + ":" + str(Port) xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings" try: key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE) _winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 1) _winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy) except Exception as e: print("ERROR: " + str(e.args)) finally: None #取消代理函数 def disableProxy(): proxy = "" xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings" try: key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE) _winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 0) _winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy) except Exception as e: print("ERROR: " + str(e.args)) finally: None def main(): proxy = getProxyIp() testProxys(proxy) try: disableProxy() while True: for ip in validProxys: iplist = ip.split(':') ip = "\""+iplist[0]+"\"" port = iplist[1] print "This is IP "+ip+':'+port enableProxy(ip,port) time.sleep(5) except Exception as e: print("ERROR: " + str(e.args)) finally: pass if __name__ == '__main__': main()
然后我兴致勃勃的去实验了一番,嗯,换了一次,然后,欸,网页咋打不开了呢?
然后我又搜索另一篇文章看到:其实真正的注册表是
HKCU\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections
下面中的DefaultConnectionSetting 文件中保留着的16进制格式,我也开始怀疑写第一种的是不是就实验了一次的原因,然后我又看了这篇文章,主要是通过命令行的形式去指定一个IP与端口。我就想到要是可以自动切换多好,IP我就从网上爬,其实我后来用到了scylla这个python库,他里面有提供的一些代理可以去使用。
然后我安装scylla,先用python把提供的IP与端口爬下来,然后进行验证,最后遍历出来去每隔多长时间设置一次代理,这样就基本完成了我的想法,在经过了一番纠结之后,完成了这个小工具,代码如下:
#!/usr/bin/python # -*- coding:utf-8 -*- import os, sys, re,requests,urllib2,time #可以自行设置请求头 User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0' header = {} header['User-Agent'] = User_Agent #定义获取代理函数(网上免费版,吐槽QVQ) def getfreeProxyIp(): proxy = [] for i in range(10): try: url = 'http://www.xicidaili.com/nn/'+str(i) req = urllib2.Request(url,headers=header) res = urllib2.urlopen(req).read() soup = BeautifulSoup(res,"html.parser") ips = soup.findAll('tr') for x in range(1,len(ips)): ip = ips[x] tds = ip.findAll("td") ip_temp = tds[1].contents[0]+":"+tds[2].contents[0] proxy.append(ip_temp) except: continue return proxy # 定义获取代理函数(scylla版本,至少我找到了可以用的) def getProxyIp(): url = 'http://192.168.110.128:8899/api/v1/proxies' r = requests.get(url).json() proxy = [] try: for i in r["proxies"]: ip = str(i['ip'])+":"+str(i['port']) proxy.append(ip) except: pass return proxy # 定义验证函数,提取可用IP代理 def testProxys(proxys): """ Test the proxys. """ validProxys = [] Url = "http://ip.chinaz.com/getip.aspx" for proxy in proxys: try: # set proxy proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy}) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) # request website response = urllib2.urlopen(Url, timeout=5).read() # set filtration condition according website if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy validProxys.append(proxy) print "%s\t%s" % (proxy) except Exception as e: continue return validProxys #定义设置代理函数 def regIESettings(op, noLocal=False, ip='', pac=''): ''' # 根据需求生成Windows代理设置注册表的.reg文件内容 # DefaultConnectionSettings项是二进制项 ''' if not op : return # 如果是设置IP代理的模式 则检查IP地址的有效性(允许为空,但不允许格式错误) if 'Proxy' in op and not ip == '': # if len(extractIp(ip))==0 if 1 > len(re.findall('([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\s*:{0,1}\s*([0-9]{1,5}){0,1}',ip)) : print '---Unexpected IP Address:%s---'%ip return options = {'On':'0F','Off':'01','ProxyOnly':'03','PacOnly':'05','ProxyAndPac':'07','D':'09','DIP':'0B','DS':'0D'} if op == 'Off': reg_value = '46,00,00,00,00,00,00,00,01' else: switcher = options.get(op) if not switcher: print '\n---Unexpected Option. Please check the value after [-o]---\n' return skipLocal = '07,00,00,00,%s'%__toHex('<local>') if noLocal else '00' reg_value = '46,00,00,00,00,00,00,00,%(switcher)s,00,00,00,%(ipLen)s,00,00,00,%(ip)s00,00,00,%(skipLocal)s,21,00,00,00%(pac)s' % ({ 'switcher':switcher,'ipLen':__toHex(len(ip)),'ip':__toHex(ip)+',' if ip else '','infoLen':__toHex(len('<local>')),'skipLocal':skipLocal,'pac':','+__toHex(pac) if pac else '' }) settings = 'Windows Registry Editor Version 5.00\n[HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections]\n"DefaultConnectionSettings"=hex:%s' % reg_value # print 'Using proxy address: %s' % ip # print op, ip, pac # print options[op] +'\n'+ __toHex(ip) +'\n'+ __toHex(pac) # print settings # === 生成reg文件并导入到注册表中 === filePath = '%s\DefaultConnectionSettings.reg'%os.getcwd() with open(filePath, 'w') as f: f.write( settings ) cmd = 'reg import "%s"' %filePath result = os.popen(cmd) if len(result.readlines()) < 2 : print '' return def __toHex(obj): if obj == '': return '' elif obj == 0 or obj == '0' or obj == '00': return '00' if isinstance(obj, str): rehex = [str(hex(ord(s))).replace('0x','') for s in obj] return ','.join(rehex) elif isinstance(obj, int): num = str(hex(obj)).replace('0x', '') return num if len(num)>1 else '0'+num # 如果是一位数则自动补上0,7为07,e为0e def main(): regIESettings(op='Off', ip='', pac='', noLocal=False) proxy = getProxyIp() validProxys = testProxys(proxy) print '---Start agent---' while True: for ip in validProxys: try: print 'Being used'+ip regIESettings(op='ProxyOnly', ip=ip, pac='', noLocal=False) time.sleep(20) except: print 'GG!' if __name__ == '__main__': main()
代码大概就是这样子的,然后我最后打包了一些程序,实现效果大概是这样子的:
原创文章,作者:Y4er,未经授权禁止转载!如若转载,请联系作者:Y4er