文章目录
- 使用Python爬取xici代理的高匿代理ip
- 使用Python爬取快代理的高匿代理ip
有时候需要做一些代理ip.常见的xici和快代理.下面是爬取他们的代码
使用requests进行爬取
使用Python爬取xici代理的高匿代理ip
import requests
from bs4 import BeautifulSoup
import randomclass get_xici_ip():# 尝试代理agents增强反反爬def random_agent(self):user_agents = ["Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5","Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5","MQQBrowser/25 (Linux; U; 2.3.3; zh-cn; HTC Desire S Build/GRI40;480*800)","Mozilla/5.0 (Linux; U; Android 2.3.3; zh-cn; HTC_DesireS_S510e Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413"'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50','Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11']return random.choice(user_agents)# 尝试代理IP增强反反爬def get_ip_list(self, url, headers):web_data = requests.get(url, headers=headers)soup = BeautifulSoup(web_data.text, 'lxml')ips = soup.find_all('tr')ip_list = []for i in range(1, len(ips)):ip_info = ips[i]tds = ip_info.find_all('td')ip_list.append(tds[1].text + ':' + tds[2].text)return ip_listdef get_random_ip(self, ip_list):proxy_list = []for ip in ip_list:proxy_list.append('http://' + ip)proxy_ip = random.choice(proxy_list)proxies = {'http': proxy_ip}return proxiesdef get_one(self):url = 'http://www.xicidaili.com/nn/%s'%random.randint(1,10)headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}ip_list = self.get_ip_list(url, headers=headers)print(ip_list)return self.get_random_ip(ip_list)
调用class
c = get_xici_ip()
d = c.get_one()
print(d)
输出结果
['27.25.196.242:9999', '117.91.232.146:9999', '111.177.178.107:9999', '111.177.188.158:9999', '111.177.179.103:9999', '111.177.181.81:9999', '183.148.133.158:9999', '110.52.235.25:9999', '111.177.187.63:9999', '111.177.172.18:9999', '111.177.178.175:9999', '116.209.54.63:9999', '183.148.140.20:9999', '116.209.52.115:9999', '117.90.2.139:9999', '111.177.177.212:9999', '119.102.189.134:9999', '119.102.188.140:9999', '119.102.188.156:9999', '121.61.2.196:9999', '49.86.180.90:9999', '219.139.141.112:9999', '111.177.189.26:9999', '111.177.191.179:9999', '122.192.174.244:9999', '111.177.167.67:9999', '125.123.139.143:9999', '125.126.210.203:9999', '125.123.140.229:9999', '171.41.84.191:9999', '111.177.185.8:9999', '110.52.235.27:9999', '123.163.117.72:9999', '111.181.35.17:9999', '113.121.146.190:9999', '111.176.29.245:9999', '116.209.58.5:9999', '111.177.175.161:9999', '113.122.169.65:9999', '121.61.2.8:808', '121.61.0.140:9999', '111.176.23.161:9999', '116.209.54.236:9999', '171.41.85.124:9999', '125.126.209.156:9999', '180.119.68.211:9999', '111.177.191.214:9999', '58.50.1.139:9999', '59.62.166.108:9999', '115.151.2.63:9999', '111.177.179.41:9999', '171.41.84.200:9999', '115.151.5.40:53128', '59.62.164.163:9999', '121.61.2.128:9999', '116.209.54.117:9999', '111.177.161.26:9999', '125.123.140.246:9999', '111.181.35.55:9999', '125.123.143.70:9999', '171.41.85.163:9999', '112.85.130.88:9999', '121.61.0.165:9999', '171.80.136.10:9999', '111.177.188.81:9999', '115.151.2.101:9999', '171.41.85.201:9999', '113.121.145.6:9999', '121.61.0.98:9999', '171.41.86.14:9999', '111.177.172.77:9999', '111.177.171.222:9999', '110.52.235.11:9999', '183.148.145.122:9999', '110.52.235.206:9999', '111.177.189.246:9999', '110.52.235.237:9999', '58.50.3.137:9999', '117.90.137.148:9999', '116.209.58.116:9999', '116.209.53.154:9999', '110.52.235.123:9999', '175.165.146.223:1133', '115.151.3.7:9999', '116.209.54.220:9999', '111.79.198.71:9999', '115.151.2.189:9999', '116.209.54.48:9999', '116.209.54.235:9999', '116.7.176.29:8118', '59.62.165.245:9999', '115.151.7.159:9999', '222.189.190.47:9999', '183.15.121.77:3128', '111.177.170.247:9999', '111.181.61.163:9999', '112.85.170.173:9999', '115.151.2.37:9999', '116.209.56.92:9999', '121.61.2.242:9999']
{'http': 'http://183.148.140.20:9999'}
使用Python爬取快代理的高匿代理ip
随机获取其中一个IP地址
import requests
from bs4 import BeautifulSoup
import randomclass get_kuaidaili_ip():# 尝试代理agents增强反反爬def random_agent(self):user_agents = ["Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5","Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5","MQQBrowser/25 (Linux; U; 2.3.3; zh-cn; HTC Desire S Build/GRI40;480*800)","Mozilla/5.0 (Linux; U; Android 2.3.3; zh-cn; HTC_DesireS_S510e Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413"'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50','Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11']return random.choice(user_agents)# 尝试代理IP增强反反爬def get_ip_list(self, url, headers):web_data = requests.get(url, headers=headers)soup = BeautifulSoup(web_data.text, 'lxml')ips = soup.find_all('tr')ip_list = []for i in range(1, len(ips)):ip_info = ips[i]tds = ip_info.find_all('td')ip_list.append(tds[0].text + ':' + tds[1].text)return ip_listdef get_random_ip(self, ip_list):proxy_list = []for ip in ip_list:proxy_list.append('http://' + ip)proxy_ip = random.choice(proxy_list)proxies = {'http': proxy_ip}return proxiesdef get_one(self):# url = 'http://www.xicidaili.com/nn/5'url = 'https://www.kuaidaili.com/free/inha/%s/'%random.randint(1,10)headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}ip_list = self.get_ip_list(url, headers=headers)print(ip_list)return self.get_random_ip(ip_list)
调用class
c = get_kuaidaili_ip()
d = c.get_one()
print(d)
输出结果:
['121.61.27.120:9999', '163.204.242.44:9999', '115.151.5.138:9999', '121.239.127.128:9999', '1.192.245.72:9999', '121.232.194.13:9000', '125.123.136.50:9999', '60.13.42.8:9999', '111.177.169.209:9999', '183.147.30.228:9000', '110.52.235.238:9999', '180.118.128.86:9999', '49.89.85.101:9999', '163.204.245.36:9999', '115.151.7.86:9999']
{'http': 'http://111.177.169.209:9999'}