Привет всем. Давно пылились на просторах диска парсеры. Библиотеки для работы Requests, Selenium, BeatifulSoup4 Все скрипты выводят в формате ip:port, на протоколы придется разбивать самим. P.S. В страничке на гитхабе откуда парсит последний файл, создатель сам ****** парсит, так что повторы будут рекомендую их как-нибудь убирать. Advanced from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from webdriver_manager.chrome import ChromeDriverManager from bs4 import BeautifulSoup class Address: def __init__(self, info): self.number = info[0] self.ip = info[1] self.port = info[2] self.type = info[3] self.country = info[4] self.speed = info[5] self.lastupdate = info[6] class Parser: def __init__(self): self.addresses = [] self.driver = self.init_driver() def init_driver(self): chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) return driver def getProxyList(self, url): self.driver.get(url) soup = BeautifulSoup(self.driver.page_source, 'lxml') cur_page = [] for tr in soup.find_all('tr'): cur_addr = [] for td in tr.find_all('td'): cur_addr.append(td.text) if cur_addr: cur_page.append(cur_addr) for cur_addr in cur_page: if len(cur_addr) == 7: try: address = Address(cur_addr) self.addresses.append(address) except Exception as e: print(f'Ошибка при создании Address: {e}') print(f'Данные: {cur_addr}') else: print(f'Неправильный формат данных: {cur_addr}') def getAllProxy(self): for i in range(1, 4): print(f'****** добавлено: {(i - 1) * 100}') url = f'https://advanced.name/ru/freeproxy?page={i}' self.getProxyList(url) self.driver.quit() def writeToFile(self): s = '' for address in self.addresses: s += f'{address.ip}:{address.port}\n' with open(f'proxies.txt', 'w', encoding='utf-8') as f: f.write(s) p = Parser() p.getAllProxy() p.writeToFile() Python from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from webdriver_manager.chrome import ChromeDriverManager from bs4 import BeautifulSoup class Address: def __init__(self, info): self.number = info[0] self.ip = info[1] self.port = info[2] self.type = info[3] self.country = info[4] self.speed = info[5] self.lastupdate = info[6] class Parser: def __init__(self): self.addresses = [] self.driver = self.init_driver() def init_driver(self): chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) return driver def getProxyList(self, url): self.driver.get(url) soup = BeautifulSoup(self.driver.page_source, 'lxml') cur_page = [] for tr in soup.find_all('tr'): cur_addr = [] for td in tr.find_all('td'): cur_addr.append(td.text) if cur_addr: cur_page.append(cur_addr) for cur_addr in cur_page: if len(cur_addr) == 7: try: address = Address(cur_addr) self.addresses.append(address) except Exception as e: print(f'Ошибка при создании Address: {e}') print(f'Данные: {cur_addr}') else: print(f'Неправильный формат данных: {cur_addr}') def getAllProxy(self): for i in range(1, 4): print(f'****** добавлено: {(i - 1) * 100}') url = f'https://advanced.name/ru/freeproxy?page={i}' self.getProxyList(url) self.driver.quit() def writeToFile(self): s = '' for address in self.addresses: s += f'{address.ip}:{address.port}\n' with open(f'proxies.txt', 'w', encoding='utf-8') as f: f.write(s) p = Parser() p.getAllProxy() p.writeToFile() Geonode import requests url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc" response = requests.get(url) data = response.json()['data'] with open('proxies.txt', 'a') as file: for proxy in data: ip = proxy['ip'] port = proxy['port'] file.write(f"{ip}:{port}\n") print("Данные успешно записаны в файл proxies.py") Python import requests url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc" response = requests.get(url) data = response.json()['data'] with open('proxies.txt', 'a') as file: for proxy in data: ip = proxy['ip'] port = proxy['port'] file.write(f"{ip}:{port}\n") print("Данные успешно записаны в файл proxies.py") ProxyWorld import requests from bs4 import BeautifulSoup from fake_useragent import UserAgent class Address: def __init__(self, info): self.ip = info[0] self.port = info[1] self.country = info[2] self.city = info[3] self.speed = info[4] self.type = info[5] self.anonymity = info[6] self.lastcheck = info[7] class Parser: def __init__(self): self.ua = UserAgent() self.headers = {'User-Agent': self.ua.random} self.addresses = [] def getProxyList(self, url): try: response = requests.get(url, headers=self.headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') rows = soup.find_all('tr')[1:] for row in rows: columns = row.find_all('td') if len(columns) == 8: cur_addr = [col.text.strip() for col in columns] address = Address(cur_addr) self.addresses.append(address) except requests.RequestException as e: print(f"Ошибка при получении данных: {e}") def getAllProxy(self): page = 1 while True: print(f'****** добавлено: {len(self.addresses)}') url = f'https://www.freeproxy.world/?type=socks5&anonymity=&country=&speed=&port=&page={page}' self.getProxyList(url) if len(self.addresses) >= 170: break page += 1 def writeToFile(self): with open('proxies.txt', 'a', encoding='utf-8') as f: for address in self.addresses: f.write(f'{address.ip}:{address.port}\n') if __name__ == '__main__': p = Parser() p.getAllProxy() p.writeToFile() Python import requests from bs4 import BeautifulSoup from fake_useragent import UserAgent class Address: def __init__(self, info): self.ip = info[0] self.port = info[1] self.country = info[2] self.city = info[3] self.speed = info[4] self.type = info[5] self.anonymity = info[6] self.lastcheck = info[7] class Parser: def __init__(self): self.ua = UserAgent() self.headers = {'User-Agent': self.ua.random} self.addresses = [] def getProxyList(self, url): try: response = requests.get(url, headers=self.headers) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') rows = soup.find_all('tr')[1:] for row in rows: columns = row.find_all('td') if len(columns) == 8: cur_addr = [col.text.strip() for col in columns] address = Address(cur_addr) self.addresses.append(address) except requests.RequestException as e: print(f"Ошибка при получении данных: {e}") def getAllProxy(self): page = 1 while True: print(f'****** добавлено: {len(self.addresses)}') url = f'https://www.freeproxy.world/?type=socks5&anonymity=&country=&speed=&port=&page={page}' self.getProxyList(url) if len(self.addresses) >= 170: break page += 1 def writeToFile(self): with open('proxies.txt', 'a', encoding='utf-8') as f: for address in self.addresses: f.write(f'{address.ip}:{address.port}\n') if __name__ == '__main__': p = Parser() p.getAllProxy() p.writeToFile() Github import requests def fetch_proxies(urls): proxies = [] for url in urls: try: response = requests.get(url) response.raise_for_status() proxies.extend(response.text.strip().split('\n')) except requests.RequestException as e: print(f"Ошибка при получении данных с {url}: {e}") return proxies def save_proxies_to_file(proxies, filename='proxies.txt'): with open(filename, 'a') as file: for proxy in proxies: file.write(f"{proxy}\n") def main(): urls = [ 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt', 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt', 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt' ] proxies = fetch_proxies(urls) save_proxies_to_file(proxies) print(f"Добавлено {len(proxies)} ****** в файл proxies.txt") if __name__ == "__main__": main() Python import requests def fetch_proxies(urls): proxies = [] for url in urls: try: response = requests.get(url) response.raise_for_status() proxies.extend(response.text.strip().split('\n')) except requests.RequestException as e: print(f"Ошибка при получении данных с {url}: {e}") return proxies def save_proxies_to_file(proxies, filename='proxies.txt'): with open(filename, 'a') as file: for proxy in proxies: file.write(f"{proxy}\n") def main(): urls = [ 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt', 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt', 'https://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt' ] proxies = fetch_proxies(urls) save_proxies_to_file(proxies) print(f"Добавлено {len(proxies)} ****** в файл proxies.txt") if __name__ == "__main__": main()
недавно решил вспомнить классику и попользоваться такими парсерами.Нашел на компе штуки три,2 сдохли,а вот один начал парсить(лучше бы он тоже сдох).За 15 минут напарсил 120к проксей!!!Ну я обрадовался,ща думаю раздачу устрою на форуме,подогрею новокеков.Поставил на проверку и пошел делами заниматься.Через час вернулся и увидел умилительную картину.Из 120к спаршеных проксей валидных оказалось целых 4,не тысячи,а просто 4 ******.В итоге поудалял с компа эти парсеры бесполезные к ебаной матери,и забыл про них как про страшный сон)))
desmodian, в целом согласен, но тут хотя бы штук 10 можно набрать, главное что бесплатно. У меня даже теория есть что сайты эти тупо их генерят, либо сами друг и друга парсят(а между теми которые в теме выложены, вроде даже были пересечения)