from selenium import webdriver from bs4 import BeautifulSoup import os BASE_URL = "https://3ddd.ru/3dmodels/" CHROME_DRIVER_PATH = 'C:\\Program Files\\Google\\Chrome\\Application\\chromedriver-win64\\chromedriver.exe' os.environ["webdriver.chrome.driver"] = CHROME_DRIVER_PATH def get_item_urls(page_number=1): options = webdriver.ChromeOptions() # options.add_argument('--headless') # Commented out for now to see the loaded page #options.add_argument('--disable-gpu') # Disable GPU acceleration, useful for headless driver = webdriver.Chrome(options=options) try: driver.get(f"{BASE_URL}?page={page_number}") # Wait for the content to load. You can adjust the sleep time driver.implicitly_wait(10) soup = BeautifulSoup(driver.page_source, 'html.parser') # Debugging prints print(soup.prettify()[:1000]) # Print out first 1000 characters of page source all_links = soup.find_all('a', href=True) print(f"Total 'a' tags found: {len(all_links)}") # Extracting all 'a' tags with the specific pattern urls = ['https://3ddd.ru' + link['href'] for link in all_links if '/3dmodels/show/' in link['href']] finally: driver.quit() return urls # Example: Get items from the main page urls = get_item_urls() for url in urls: print(url)