前两天我在上班时候被网站一直拦截,突然想到了一个想法,如果可以不停的更换代理,会不会好点呢,说着我就打开神器百度搜索开始了。

在经过一番搜索之后,我发现网上大部分文章都指向了一个地址:

HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings,应该是用修改IE注册表的方式进行实现的,而平常的脚本中设置代理,我更喜欢这种修改注册表的方式。经过一番复制粘贴。我完成了最初版本的代码:

 

#!/usr/bin/python

# -*- coding: utf-8 -*-

import io, sys, time, re, os,urllib2

from bs4 import BeautifulSoup

import _winreg

import urllib

import socket




User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0' 

header = {} 

header['User-Agent'] = User_Agent 




#获取 IP

def getProxyIp(): 

 proxy = [] 

 for i in range(10): 

  try: 

   url = 'http://www.xicidaili.com/nn/'+str(i) 

   req = urllib2.Request(url,headers=header) 

   res = urllib2.urlopen(req).read()

   soup = BeautifulSoup(res,"html.parser") 

   ips = soup.findAll('tr') 

   for x in range(1,len(ips)): 

    ip = ips[x] 

    tds = ip.findAll("td") 

    ip_temp = tds[1].contents[0]+":"+tds[2].contents[0] 

    proxy.append(ip_temp)

  except: 

   continue 

 return proxy







#验证IP

def testProxys(proxys):

    """ Test the proxys. """

    validProxys = []

    Url = "http://ip.chinaz.com/getip.aspx"

    for proxy in proxys:

        try:

            # set proxy

            proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy})

            opener = urllib2.build_opener(proxy_handler)

            urllib2.install_opener(opener)

            # request website

            response = urllib2.urlopen(Url, timeout=5).read()




            # set filtration condition according website

            if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy

                validProxys.append(proxy)

                print "%s\t%s" % (proxy, response)

        except Exception as e:

            print "%s\t%s" % (proxy, "invalid")

            continue




    return validProxys

#设置代理函数

def enableProxy(IP, Port):

    proxy = IP + ":" + str(Port)

    xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings"

    try:

        key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE)

        _winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 1)

        _winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy)

    except Exception as e:

        print("ERROR: " + str(e.args))

    finally:

        None

#取消代理函数

def disableProxy():

    proxy = ""

    xpath = "Software\Microsoft\Windows\CurrentVersion\Internet Settings"

    try:

        key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, xpath, 0, _winreg.KEY_WRITE)

        _winreg.SetValueEx(key, "ProxyEnable", 0, _winreg.REG_DWORD, 0)

        _winreg.SetValueEx(key, "ProxyServer", 0, _winreg.REG_SZ, proxy)

    except Exception as e:

        print("ERROR: " + str(e.args))

    finally:

        None




def main():

    proxy = getProxyIp()

    testProxys(proxy)

    try:

        disableProxy()

        while True:

            for ip in validProxys:

                iplist = ip.split(':')

                ip = "\""+iplist[0]+"\""

                port = iplist[1]

                print "This is IP  "+ip+':'+port

                enableProxy(ip,port)

                time.sleep(5)

    except Exception as e:

        print("ERROR: " + str(e.args))

    finally:

        pass




if __name__ == '__main__':

    main()

然后我兴致勃勃的去实验了一番,嗯,换了一次,然后,欸,网页咋打不开了呢?

然后我又搜索另一篇文章看到:其实真正的注册表是

HKCU\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections

下面中的DefaultConnectionSetting   文件中保留着的16进制格式,我也开始怀疑写第一种的是不是就实验了一次的原因,然后我又看了这篇文章,主要是通过命令行的形式去指定一个IP与端口。我就想到要是可以自动切换多好,IP我就从网上爬,其实我后来用到了scylla这个python库,他里面有提供的一些代理可以去使用。

然后我安装scylla,先用python把提供的IP与端口爬下来,然后进行验证,最后遍历出来去每隔多长时间设置一次代理,这样就基本完成了我的想法,在经过了一番纠结之后,完成了这个小工具,代码如下:

 

#!/usr/bin/python

# -*- coding:utf-8 -*-

import os, sys, re,requests,urllib2,time

#可以自行设置请求头

User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0' 

header = {} 

header['User-Agent'] = User_Agent




#定义获取代理函数(网上免费版,吐槽QVQ)

def getfreeProxyIp(): 

 proxy = [] 

 for i in range(10): 

  try: 

   url = 'http://www.xicidaili.com/nn/'+str(i) 

   req = urllib2.Request(url,headers=header) 

   res = urllib2.urlopen(req).read()

   soup = BeautifulSoup(res,"html.parser") 

   ips = soup.findAll('tr') 

   for x in range(1,len(ips)): 

    ip = ips[x] 

    tds = ip.findAll("td") 

    ip_temp = tds[1].contents[0]+":"+tds[2].contents[0] 

    proxy.append(ip_temp)

  except: 

   continue 

 return  proxy




# 定义获取代理函数(scylla版本,至少我找到了可以用的)

def getProxyIp(): 

    url = 'http://192.168.110.128:8899/api/v1/proxies'

    r = requests.get(url).json()

    proxy = []

    try:

        for i in r["proxies"]:

            ip = str(i['ip'])+":"+str(i['port'])

            proxy.append(ip)

    except: 

        pass

    return  proxy




# 定义验证函数,提取可用IP代理

def testProxys(proxys):

    """ Test the proxys. """

    validProxys = []

    Url = "http://ip.chinaz.com/getip.aspx"

    for proxy in proxys:

        try:

            # set proxy

            proxy_handler = urllib2.ProxyHandler({'http':proxy, 'https':proxy})

            opener = urllib2.build_opener(proxy_handler)

            urllib2.install_opener(opener)

            # request website

            response = urllib2.urlopen(Url, timeout=5).read()




            # set filtration condition according website

            if re.findall('{ip:.*?,address:..*?}', response) != []: # remove invalid proxy

                validProxys.append(proxy)

                print "%s\t%s" % (proxy)

        except Exception as e:

            continue




    return validProxys




#定义设置代理函数




def regIESettings(op, noLocal=False, ip='', pac=''):

  '''

    # 根据需求生成Windows代理设置注册表的.reg文件内容

    # DefaultConnectionSettings项是二进制项

  '''

  if not op : return

  # 如果是设置IP代理的模式 则检查IP地址的有效性(允许为空,但不允许格式错误)

  if 'Proxy' in op and not ip == '':

    # if len(extractIp(ip))==0

    if 1 > len(re.findall('([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\s*:{0,1}\s*([0-9]{1,5}){0,1}',ip)) :

      print '---Unexpected IP Address:%s---'%ip

      return

  options = {'On':'0F','Off':'01','ProxyOnly':'03','PacOnly':'05','ProxyAndPac':'07','D':'09','DIP':'0B','DS':'0D'}

  if op == 'Off':

    reg_value = '46,00,00,00,00,00,00,00,01'

  else:

    switcher = options.get(op)

    if not switcher:

      print '\n---Unexpected Option. Please check the value after [-o]---\n'

      return

    skipLocal = '07,00,00,00,%s'%__toHex('<local>') if noLocal else '00'

    reg_value = '46,00,00,00,00,00,00,00,%(switcher)s,00,00,00,%(ipLen)s,00,00,00,%(ip)s00,00,00,%(skipLocal)s,21,00,00,00%(pac)s' % ({ 'switcher':switcher,'ipLen':__toHex(len(ip)),'ip':__toHex(ip)+',' if ip else '','infoLen':__toHex(len('<local>')),'skipLocal':skipLocal,'pac':','+__toHex(pac) if pac else '' })

  settings = 'Windows Registry Editor Version 5.00\n[HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Internet Settings\Connections]\n"DefaultConnectionSettings"=hex:%s' % reg_value

  # print 'Using proxy address: %s' % ip

  # print op, ip, pac

  # print options[op] +'\n'+ __toHex(ip) +'\n'+ __toHex(pac)

  # print settings

  # === 生成reg文件并导入到注册表中 ===

  filePath = '%s\DefaultConnectionSettings.reg'%os.getcwd()

  with open(filePath, 'w') as f:

    f.write( settings )

  cmd = 'reg import "%s"' %filePath

  result  = os.popen(cmd)

  if len(result.readlines()) < 2 :

    print ''

  return




def __toHex(obj):

  if   obj == '': return ''

  elif obj == 0 or obj == '0' or obj == '00': return '00'

  if isinstance(obj, str):

    rehex = [str(hex(ord(s))).replace('0x','') for s in obj]

    return ','.join(rehex)

  elif isinstance(obj, int):

    num = str(hex(obj)).replace('0x', '')

    return num if len(num)>1 else '0'+num # 如果是一位数则自动补上0,7为07,e为0e




def main():

  regIESettings(op='Off', ip='', pac='', noLocal=False)

  proxy = getProxyIp()

  validProxys = testProxys(proxy)

  print '---Start agent---'

  while True:

    for ip in validProxys:

        try:

            print 'Being used'+ip

            regIESettings(op='ProxyOnly', ip=ip, pac='', noLocal=False)

            time.sleep(20)

      except:

          print 'GG!'




if __name__ == '__main__':

    main()


代码大概就是这样子的,然后我最后打包了一些程序,实现效果大概是这样子的: