redis自身的expire不太适合应用场景,故在ip代理中加入时间戳,每次插入清理过期ip代理
- 数据结构:
集合Set
- 插入元素:
r.sadd(”ip_pools”, “http://8.8.8.8:8888?{int(time.time())}”)
- 批量插入:
r.sadd(”ip_pools”, *proxy_list)
- 清理元素:
r.srem("ip_pools", “http://8.8.8.8:8888?{int(time.time())}”)
- 随机提取:
r.srandmember(”ip_pools”)
- 全部提取:
r.smembers(”ip_pools”)
代理池代码示例
import time
import requests
import redis
from threading import Timer
def fetch_ip_proxies():
url = "http://gev.qydailiip.com/api"
try:
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
print(f"Error occurred while fetching proxies: {e}")
return []
else:
return response.text.split('\n')
def store_to_redis(ip_proxies, redis_host='192.168.1.252', redis_port=6379, password="redispwd", db=11):
r = redis.Redis(host=redis_host, port=redis_port, password=password, db=db)
for ip in ip_proxies:
# Use the IP proxy as the key and value
r.sadd("ip_pools", f"http://{ip}?{int(time.time())}")
print(f"[{time.ctime()[11:19]}] 代理池入库数量:",len(ip_proxies))
def clean_redis(ttl=180, redis_host='192.168.1.252', redis_port=6379, password="redispwd", db=11):
r = redis.Redis(host=redis_host, port=redis_port, password=password, db=db)
ip_pools = r.smembers("ip_pools")
for ip in ip_pools:
try:
t1 = int(ip.decode().split("?")[-1])
t2 = time.time() - t1
if t2 > 180:
r.srem("ip_pools", ip)
except:
r.srem("ip_pools", ip)
print(f"[{time.ctime()[11:19]}] 代理池清理前数量:", len(ip_pools), "清理后数量:", len(r.smembers("ip_pools")))
Timer(10, clean_redis).start()
def main():
ip_proxies = fetch_ip_proxies()
if ip_proxies:
store_to_redis(ip_proxies)
if __name__ == "__main__":
clean_redis()
while True:
main()
time.sleep(10)