Dynamic block parsing + scrolling

Joined
May 30, 2024
Messages
2
Reaction score
0
I created a test email account. Tell me how to extract the dynamic values of the file loading block and make normal scrolling. This is what I get, but for some reason no new values are added when scrolling
The script itself clicks on all the 'download' icons one by one and catches requests where the direct download URL is located.



Python:
import csv
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
chrome_options = Options()
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
 
capabilities = DesiredCapabilities.CHROME.copy()
capabilities['goog:loggingPrefs'] = {'performance': 'ALL'}
 
chrome_driver_path = 'C:\\webdrivers\\chromedriver.exe' 
 
import os
if not os.path.exists(chrome_driver_path):
    raise FileNotFoundError(f"ChromeDriver not found at {chrome_driver_path}")
 
service = Service(chrome_driver_path)
try:
    driver = webdriver.Chrome(service=service, options=chrome_options, desired_capabilities=capabilities)
except TypeError:
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
    driver = webdriver.Chrome(service=service, options=chrome_options)
 
login_url = 'https://account.mail.ru/login?fail=1'
 
driver.get(login_url)
 
wait = WebDriverWait(driver, 30)
email_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[name="username"]')))
email_field.click() 
email_field.clear() 
email_field.send_keys('[email protected]')
 
next_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[data-test-id="next-button"]')))
next_button.click()
 
password_field = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[name="password"]')))
password_field.click() 
password_field.clear() 
password_field.send_keys('Qzwxas12!')
 
login_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[data-test-id="submit-button"]')))
login_button.click()
 
time.sleep(5)
 
cloud_url = 'https://cloud.mail.ru/albums/ada729f1-181d-495f-86d5-86f2b5e44fa8'
driver.get(cloud_url)
time.sleep(5) 
 
toolbar_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="TOP_TOOLBAR_ID"]/div[2]/div[1]')))
toolbar_button.click()
 
list_option = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[contains(text(), "Списком")]')))
list_option.click()
 
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2) 
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
 
download_icons = driver.find_elements(By.CSS_SELECTOR, 'div[id="download-icon"]')
 
download_urls = []
 
driver.execute_cdp_cmd('Network.enable', {})
 
def intercept_requests(driver):
    logs = driver.get_log('performance')
    for log in logs:
        message = json.loads(log['message'])['message']
        if message['method'] == 'Network.requestWillBeSent':
            url = message['params']['request']['url']
            if 'datacloudmail.ru/attach/' in url:
                download_urls.append(url)
 
for icon in download_icons:
    ActionChains(driver).move_to_element(icon).click(icon).perform()
    time.sleep(2) 
    intercept_requests(driver)
 
with open('download_urls.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['File Name', 'Download URL'])
    for url in download_urls:
        file_name = url.split('/')[-1]
        writer.writerow([file_name, url])
 
driver.quit()
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,882
Messages
2,569,948
Members
46,267
Latest member
TECHSCORE

Latest Threads

Top