import requests
import random
import time proxy_list = [ "http://user:pass@ip1:port", "http://user:pass@ip2:port", "http://user:pass@ip3:port"
] user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", "Mozilla/5.0 (X11; Linux x86_64)"
] def get_proxy(): return random.choice(proxy_list) def get_headers(): return { "User-Agent": random.choice(user_agents) } def fetch(url): for attempt in range(3): proxy = get_proxy() try: response = requests.get( url, proxies={"http": proxy, "https": proxy}, headers=get_headers(), timeout=10 ) if response.status_code == 200: return response.text except: pass time.sleep(random.uniform(1, 3)) return None
import requests
import random
import time proxy_list = [ "http://user:pass@ip1:port", "http://user:pass@ip2:port", "http://user:pass@ip3:port"
] user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", "Mozilla/5.0 (X11; Linux x86_64)"
] def get_proxy(): return random.choice(proxy_list) def get_headers(): return { "User-Agent": random.choice(user_agents) } def fetch(url): for attempt in range(3): proxy = get_proxy() try: response = requests.get( url, proxies={"http": proxy, "https": proxy}, headers=get_headers(), timeout=10 ) if response.status_code == 200: return response.text except: pass time.sleep(random.uniform(1, 3)) return None
import requests
import random
import time proxy_list = [ "http://user:pass@ip1:port", "http://user:pass@ip2:port", "http://user:pass@ip3:port"
] user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)", "Mozilla/5.0 (X11; Linux x86_64)"
] def get_proxy(): return random.choice(proxy_list) def get_headers(): return { "User-Agent": random.choice(user_agents) } def fetch(url): for attempt in range(3): proxy = get_proxy() try: response = requests.get( url, proxies={"http": proxy, "https": proxy}, headers=get_headers(), timeout=10 ) if response.status_code == 200: return response.text except: pass time.sleep(random.uniform(1, 3)) return None
def get_headers(): return { "User-Agent": random.choice(user_agents), "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml" }
def get_headers(): return { "User-Agent": random.choice(user_agents), "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml" }
def get_headers(): return { "User-Agent": random.choice(user_agents), "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml" }
time.sleep(random.uniform(1, 3))
time.sleep(random.uniform(1, 3))
time.sleep(random.uniform(1, 3))
if response.status_code in [403, 429]: return None
if response.status_code in [403, 429]: return None
if response.status_code in [403, 429]: return None - Random request failures
- Rate limits
- Inconsistent responses - Sending too many requests too quickly
- Using the same IP repeatedly
- Missing or unrealistic headers
- No retry handling - Rotates proxies
- Rotates headers
- Retries failed requests
- Adds delays - Accept-Language - HTTP 403 / 429 status codes
- CAPTCHA pages
- Empty or unexpected responses - Larger proxy pools
- Queue systems (e.g., task queues)
- Parallel workers
- Logging and monitoring - Harder to block