ВАЖНАЯ ИНФОРМАЦИЯ Благодаря простому способу можно найти что-то интересно 1. Переходим по ссылке https://cse.google.com/cse?cx=167f964aecdad17c5 2. Пишем любой запрос (всё зависит от вашей фантазии, но популярные checker/base не советую) 3. Ищем адекватное облако 4. Получаем это (нюдсы) Из минусов: всё сильно засрано спам темами и нужно постараться, чтобы найти что-нибудь но можно написать через чатгпт легкий парсер для автопарса и получать только нужные ссылки без спама (редактор сдоx на лолзе, поэтому код в таком виде) Зависимости pip install playwright pip install aiofiles Перед запуском пройдите капчу вручную в поиске import asyncio from playwright.async_api import async_playwright import re SEARCH_ENGINE_URL = "https://cse.google.com/cse?cx=167f964aecdad17c5" RESULT_FILE = "results.txt" JUNK_LOG_FILE = "junk_links.txt" def is_junk_link(link): if re.search(r'/edit(\?|$)', link): return "URL contains '/edit'" return None async def is_access_denied(page): try: title = (await page.title()).lower() content = (await page.inner_text("body")).lower() deny_phrases = [ "нет доступа", "request access", "запросите доступ", "you need permission", "you must be signed in", "sign in", "телефон или адрес эл. почты", ] return any(phrase in title or phrase in content for phrase in deny_phrases) except: return False async def run(): query = input("Введите поисковый запрос: ").strip() processed = set() try: with open(RESULT_FILE, "r", encoding="utf-8") as f: for line in f: processed.add(line.strip()) except FileNotFoundError: pass with open(RESULT_FILE, "a", encoding="utf-8") as results_file, \ open(JUNK_LOG_FILE, "a", encoding="utf-8") as junk_file: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context() page = await context.new_page() await page.goto(SEARCH_ENGINE_URL) await page.wait_for_selector("input.gsc-input") await page.fill("input.gsc-input", query) await page.keyboard.press("Enter") await page.wait_for_selector(".gsc-webResult") current_page = 1 while True: print(f"[Страница {current_page}]") await page.wait_for_timeout(1500) results = await page.query_selector_all(".gsc-webResult a") for r in results: url = await r.get_attribute("href") if not url: continue url = url.strip() if url in processed: continue reason = is_junk_link(url) if reason: print(f"✘ Мусор (по URL): {url} — {reason}") junk_file.write(f"{url} — {reason}\n") junk_file.flush() continue try: new_page = await context.new_page() await new_page.goto(url, timeout=15000) await new_page.wait_for_timeout(2000) if await is_access_denied(new_page): reason = "Access denied" print(f"✘ Мусор (по содержимому): {url} — {reason}") junk_file.write(f"{url} — {reason}\n") junk_file.flush() else: print(f" Сохраняется: {url}") results_file.write(url + "\n") results_file.flush() processed.add(url) await new_page.close() except Exception as e: print(f" Ошибка при открытии: {url} — {str(e)}") junk_file.write(f"{url} — failed to load ({str(e)})\n") junk_file.flush() next_button = await page.query_selector(f'.gsc-cursor-page[aria-label="Page {current_page + 1}"]') if next_button: await next_button.click() await page.wait_for_timeout(2000) current_page += 1 else: break await browser.close() if __name__ == "__main__": asyncio.run(run()) Code import asyncio from playwright.async_api import async_playwright import re SEARCH_ENGINE_URL = "https://cse.google.com/cse?cx=167f964aecdad17c5" RESULT_FILE = "results.txt" JUNK_LOG_FILE = "junk_links.txt" def is_junk_link(link): if re.search(r'/edit(\?|$)', link): return "URL contains '/edit'" return None async def is_access_denied(page): try: title = (await page.title()).lower() content = (await page.inner_text("body")).lower() deny_phrases = [ "нет доступа", "request access", "запросите доступ", "you need permission", "you must be signed in", "sign in", "телефон или адрес эл. почты", ] return any(phrase in title or phrase in content for phrase in deny_phrases) except: return False async def run(): query = input("Введите поисковый запрос: ").strip() processed = set() try: with open(RESULT_FILE, "r", encoding="utf-8") as f: for line in f: processed.add(line.strip()) except FileNotFoundError: pass with open(RESULT_FILE, "a", encoding="utf-8") as results_file, \ open(JUNK_LOG_FILE, "a", encoding="utf-8") as junk_file: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) context = await browser.new_context() page = await context.new_page() await page.goto(SEARCH_ENGINE_URL) await page.wait_for_selector("input.gsc-input") await page.fill("input.gsc-input", query) await page.keyboard.press("Enter") await page.wait_for_selector(".gsc-webResult") current_page = 1 while True: print(f"[Страница {current_page}]") await page.wait_for_timeout(1500) results = await page.query_selector_all(".gsc-webResult a") for r in results: url = await r.get_attribute("href") if not url: continue url = url.strip() if url in processed: continue reason = is_junk_link(url) if reason: print(f"✘ Мусор (по URL): {url} — {reason}") junk_file.write(f"{url} — {reason}\n") junk_file.flush() continue try: new_page = await context.new_page() await new_page.goto(url, timeout=15000) await new_page.wait_for_timeout(2000) if await is_access_denied(new_page): reason = "Access denied" print(f"✘ Мусор (по содержимому): {url} — {reason}") junk_file.write(f"{url} — {reason}\n") junk_file.flush() else: print(f" Сохраняется: {url}") results_file.write(url + "\n") results_file.flush() processed.add(url) await new_page.close() except Exception as e: print(f" Ошибка при открытии: {url} — {str(e)}") junk_file.write(f"{url} — failed to load ({str(e)})\n") junk_file.flush() next_button = await page.query_selector(f'.gsc-cursor-page[aria-label="Page {current_page + 1}"]') if next_button: await next_button.click() await page.wait_for_timeout(2000) current_page += 1 else: break await browser.close() if __name__ == "__main__": asyncio.run(run())