esi4

Это содержимое защищено паролем. Для его просмотра введите, пожалуйста, пароль:

Прокрутить вверх

import time
import openpyxl
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager


def start_browser():
options = webdriver.ChromeOptions()
options.add_argument(«—disable-notifications»)
options.add_argument(«—disable-blink-features=AutomationControlled»)
options.add_argument(«—start-maximized»)
options.add_experimental_option(«excludeSwitches», [«enable-automation»])

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

driver.execute_cdp_cmd(«Page.addScriptToEvaluateOnNewDocument», {
«source»: «»»
Object.defineProperty(navigator, ‘webdriver’, {
get: () => undefined
})
«»»
})
return driver


def search_yandex_image(driver, query):
try:
driver.get(«https://yandex.ru/images/»)

search_box = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «input.input__control.mini-suggest__input»))
)

search_box.clear()
search_box.send_keys(query)

for _ in range(3):
if search_box.get_attribute(‘value’) == query:
break
time.sleep(0.1)
search_box.clear()
search_box.send_keys(query)

search_box.send_keys(Keys.RETURN)

WebDriverWait(driver, 2).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «div.SerpList»))
)
time.sleep(1)

previews = WebDriverWait(driver, 2).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, «img.ImagesContentImage-Image_clickable»))
)[:2]

for preview in previews:
try:
preview.click()
time.sleep(1)

big_image = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «img.MMImage-Origin»))
)
image_url = big_image.get_attribute(‘src’)

if image_url and ‘http’ in image_url:
return image_url

except Exception as e:
print(f»Ошибка при обработке превью: {str(e)}»)
continue

return None

except Exception as e:
print(f»Общая ошибка поиска: {str(e)}»)
return None


def update_excel(file_path):
wb = openpyxl.load_workbook(file_path)
sheet = wb.active

if sheet.cell(row=1, column=6).value != «доп.»:
sheet.cell(row=1, column=6, value=»доп.»)

driver = None
try:
driver = start_browser()
processed = 0
start_time = time.time()

time.sleep(3)

for row in sheet.iter_rows(min_row=2, values_only=False):

if row[4].value not in [None, «НетФото»]:
continue


car = row[0].value or «»
categ = row[1].value or «»
part_name = row[2].value or «»
part_number = row[3].value or «»

if not part_name or not part_number:
continue

search_query = f’ «{part_name}» {categ} {part_number} для «{car}»‘
print(f»Обработка: {search_query}»)

try:
image_url = search_yandex_image(driver, search_query)

if image_url and image_url.startswith(‘http’):
row[4].value = image_url
row[5].value = «Фото может не соответствовать»
print(f»Успешно: {image_url[:50]}…»)
else:
row[4].value = «НетФото»
print(«Изображение не найдено»)

processed += 1
if processed % 5 == 0:
wb.save(file_path)
print(f»Прогресс: {processed} строк»)

except Exception as e:
print(f»Ошибка: {str(e)}»)
continue

wb.save(file_path)
print(f»Всего обработано: {processed} строк»)

except Exception as e:
print(f»Критическая ошибка: {str(e)}»)
finally:
if driver:
driver.quit()
wb.close()
print(f»Всего обработано {processed} строк. Время выполнения: {time.time() — start_time:.1f} сек»)


if __name__ == «__main__»:
update_excel(«changanNoPics.xlsx»)

import os
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from tqdm import tqdm

BASE_URL = ‘https://relines.ru’
MODELS = [
{‘name’: ‘Geely Atlas Pro I (2019)’, ‘urls’: [‘https://relines.ru/catalog/geely/atlas-pro/nl-3b’]},
{‘name’: ‘Geely Coolray I (2018)’, ‘urls’: [‘https://relines.ru/catalog/geely/coolray/sx11’]},
{‘name’: ‘Geely Coolray I рест (2022)’, ‘urls’: [‘https://relines.ru/catalog/geely/coolray/sx11la’]},
{‘name’: ‘Geely Emgrand 7 I (2009)’, ‘urls’: [‘https://relines.ru/catalog/geely/emgrand-7/fe-1-2’]},
{‘name’: ‘Geely Emgrand 7 IV (2021)’, ‘urls’: [‘https://relines.ru/catalog/geely/emgrand-7/ss11’]},
{‘name’: ‘Geely Emgrand 7 II рест (2016)’, ‘urls’: [‘https://relines.ru/catalog/geely/emgrand-7/fe-3jc’]},
{‘name’: ‘Geely Emgrand 7 II (2014)’, ‘urls’: [‘https://relines.ru/catalog/geely/emgrand-7/fe-3-4’]},
{‘name’: ‘Geely Monjaro I (2021)’, ‘urls’: [‘https://relines.ru/catalog/geely/monjaro/kx11’]},
{‘name’: ‘Geely Tugella I рест (2022)’, ‘urls’: [‘https://relines.ru/catalog/geely/tugella/fy11-my2022’]},
{‘name’: ‘Geely Tugella I (2019)’, ‘urls’: [‘https://relines.ru/catalog/geely/tugella/fy11-my2019’]},
{‘name’: ‘Geely Vision I (2006)’, ‘urls’: [‘https://relines.ru/catalog/geely/vision/fc-1’]},
{‘name’: ‘Omoda С5 I (2022)’, ‘urls’: [‘https://relines.ru/catalog/omoda/c5/t19c’]},
{‘name’: ‘Chery Arrizo 8 I (2022)’, ‘urls’: [‘https://relines.ru/catalog/chery/arrizo-8/m1e’]},
{‘name’: ‘Chery Tiggo 2 Pro I (2021)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-2-pro/a13tfl’]},
{‘name’: ‘Chery Tiggo 3 — I (2014)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-3/t11fl3’]},
{‘name’: ‘Chery Tiggo 4 I рест (2018)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-4/t19’]},
{‘name’: ‘Chery Tiggo 4 I (2017)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-4/t17’]},
{‘name’: ‘Chery Tiggo 5 I (2013)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-5/t21’]},
{‘name’: ‘Chery Tiggo 7 Pro I (2020)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-7-pro/t1e’]},
{‘name’: ‘Chery Tiggo 8 Pro I (2020)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-8-pro/t1a’]},
{‘name’: ‘Chery Tiggo 8 Pro Max I (2022)’, ‘urls’: [‘https://relines.ru/catalog/chery/tiggo-8-pro-max/t1d’]},
{‘name’: ‘Haval F7 I (2018)’, ‘urls’: [‘https://relines.ru/catalog/haval/f7/my2018’]},
{‘name’: ‘Haval F7x I (2019)’, ‘urls’: [‘https://relines.ru/catalog/haval/f7x/my2019’]},
{‘name’: ‘Haval H2 I (2014)’, ‘urls’: [‘https://relines.ru/catalog/haval/h2/my2014’]},
{‘name’: ‘Haval H6 Coupe I (2015)’, ‘urls’: [‘https://relines.ru/catalog/haval/h6-coupe/my2015’]},
{‘name’: ‘Haval H9 I — рест I (2017)’, ‘urls’: [‘https://relines.ru/catalog/haval/h9/my2017’]},
{‘name’: ‘Haval H9 I (2014)’, ‘urls’: [‘https://relines.ru/catalog/haval/h9/my2014’]},
]

IMG_FOLDER = ‘PartsPics’
CSV_FILE = ‘relines_parts.csv’
os.makedirs(IMG_FOLDER, exist_ok=True)


def init_driver():
chrome_options = Options()
chrome_options.add_argument(«—disable-gpu»)
chrome_options.add_argument(«—no-sandbox»)
chrome_options.add_argument(«—window-size=1920,1080»)
return webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)


def save_to_csv(data, filename):
df = pd.DataFrame(data)
df.to_csv(filename, mode=’w’, header=True, index=False, sep=’;’, encoding=’utf-8-sig’)


def process_image(img_url, model_name, article):
if not img_url:
return ‘No image’

try:
safe_model = model_name.replace(‘/’, ‘_’).replace(‘ ‘, ‘_’)
safe_article = article.replace(‘/’, ‘_’).strip() or ‘no_article’
img_name = f»{safe_model}_{safe_article}.jpg»
img_path = os.path.join(IMG_FOLDER, img_name)

clean_img_url = img_url.split(‘?’)[0] if ‘?’ in img_url else img_url
response = requests.get(clean_img_url, stream=True)

if response.status_code == 200:
with open(img_path, ‘wb’) as f:
for chunk in response.iter_content(1024):
f.write(chunk)
return img_path
return ‘Download failed’
except Exception as e:
print(f»Ошибка загрузки изображения: {str(e)[:100]}»)
return ‘Error’


def parse_part(item, model_name):
try:
name_elem = item.select_one(‘.catalog-product-grid-item__name’)
name = name_elem.text.strip() if name_elem else ‘N/A’

sku_elem = item.select_one(‘.sku-code’)
sku = sku_elem.text.strip() if sku_elem else ‘N/A’

img_elem = item.select_one(‘.catalog-product-grid-item__image’)
img_url = img_elem[‘src’] if img_elem and img_elem.has_attr(‘src’) else »

img_path = process_image(img_url, model_name, sku)

return {
‘model’: model_name,
‘part_name’: name,
‘article’: sku,
‘image’: img_path
}
except Exception as e:
print(f»Ошибка обработки запчасти: {str(e)[:100]}»)
return None


def process_model_page(driver, model_url, model_name):
parts = []
current_url = model_url

with tqdm(desc=f»Обработка {model_name}», unit=»страница», colour=’blue’) as pbar:
while True:
driver.get(current_url)
try:
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «.catalog-product-grid-item»))
)
except:
break

soup = BeautifulSoup(driver.page_source, ‘lxml’)
items = soup.select(‘.catalog-product-grid-item’)

for item in items:
part_data = parse_part(item, model_name)
if part_data:
parts.append(part_data)

next_btn = soup.select_one(‘a.pagination__next:not(.pagination__next_disabled)’)
if not next_btn:
break

current_url = BASE_URL + next_btn[‘href’]
pbar.update(1)

return parts


def main():
driver = init_driver()
try:
for model in MODELS:
model_name = model[‘name’]
print(f»\nОбработка модели: {model_name}»)

safe_filename = f»{model_name.replace(‘/’, ‘_’).replace(‘*’, ‘_’).replace(‘?’, ‘_’).replace(‘:’, ‘_’)}.csv»

all_parts = []
for model_url in model[‘urls’]:
parts = process_model_page(driver, model_url, model_name)
if parts:
all_parts.extend(parts)

if all_parts:
save_to_csv(all_parts, safe_filename)
print(f»Создан файл {safe_filename} с {len(all_parts)} запчастями»)

finally:
driver.quit()


if __name__ == ‘__main__’:
main()

import os
import time

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from tqdm import tqdm


BASE_URL = ‘https://tigerauto.ru’
MODELS = [
{‘name’: ‘Chery E3 Bonus 3’, ‘url’: ‘https://tigerauto.ru/chery/chery-bonus-3-a19’},
{‘name’: ‘Geely Atlas’, ‘url’: ‘https://tigerauto.ru/geely/geely-atlas’},
{‘name’: ‘Geely Emgrand EC7’, ‘url’: ‘https://tigerauto.ru/geely/geely-emgrand-ec7’}
]

IMG_FOLDER = ‘chery_e3 geely_emgrand-ec7-gl atlas’
CSV_FILE = ‘parts.csv’
SCHEMATIC_FOLDER = os.path.join(IMG_FOLDER, ‘Чертежи’)
os.makedirs(SCHEMATIC_FOLDER, exist_ok=True)


def init_driver():
chrome_options = Options()
#chrome_options.add_argument(‘—headless’)
chrome_options.add_argument(«—disable-gpu»)
chrome_options.add_argument(«—no-sandbox»)
chrome_options.add_argument(«—window-size=1920,1080»)
return webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)

def smooth_scroll(driver):
«»»Плавная прокрутка страницы»»»
last_height = driver.execute_script(«return document.body.scrollHeight»)
while True:
driver.execute_script(«window.scrollTo(0, document.body.scrollHeight);»)
time.sleep(0.5)
new_height = driver.execute_script(«return document.body.scrollHeight»)
if new_height == last_height:
break
last_height = new_height
time.sleep(3)
def save_to_csv(data, filename):
df = pd.DataFrame(data)
header = not os.path.exists(filename)
df.to_csv(filename, mode=’a’, header=header, index=False, sep=’;’, encoding=’utf-8-sig’)
time.sleep(3)

def get_full_name(element):
«»»Извлекает полное название из различных источников»»»
try:

img = element.find(‘img’)
if img and img.has_attr(‘title’):
return img[‘title’].split(‘Каталог’)[-1].strip()

text_block = element.find(‘div’, class_=’vh’)
if text_block:
return text_block.get_text(strip=True)

href = element.find(‘a’)[‘href’]
return href.split(‘/’)[-1].replace(‘-‘, ‘ ‘).title()

except Exception as e:
print(f»Ошибка извлечения названия: {str(e)[:100]}»)
return «Неизвестная категория»

time.sleep(3)
def get_categories(driver, model_url):
driver.get(model_url)
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «div.col-md-2.col-sm-3.col-xs-6»))
)
except:
return []

soup = BeautifulSoup(driver.page_source, ‘lxml’)
categories = soup.select(‘div.col-md-2.col-sm-3.col-xs-6’)

result = []
for cat in categories:
try:
name = get_full_name(cat)
url = BASE_URL + cat.find(‘a’)[‘href’]
result.append({‘name’: name, ‘url’: url})
except Exception as e:
print(f»Ошибка обработки категории: {str(e)[:100]}»)

print(f»Найдено категорий: {len(result)}»)
return result

time.sleep(3)
def get_subcategories(driver, category_url):
driver.get(category_url)
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «div.col-md-2.col-sm-4.col-xs-12.vcenter»))
)
except:
return []

soup = BeautifulSoup(driver.page_source, ‘lxml’)
subcats = soup.select(‘div.col-md-2.col-sm-4.col-xs-12.vcenter’)

result = []
for sub in subcats:
try:
name = get_full_name(sub)
url = BASE_URL + sub.find(‘a’)[‘href’]
result.append({‘name’: name, ‘url’: url})
except Exception as e:
print(f»Ошибка обработки подкатегории: {str(e)[:100]}»)

print(f»Найдено подкатегорий: {len(result)}»)
return result

time.sleep(3)


def get_parts(driver, subcategory_url, category_name, subcategory_name, model_name):
driver.get(subcategory_url)

try:
smooth_scroll(driver)
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.CSS_SELECTOR, «#sp-table tbody tr»))
)
except Exception as e:
print(f»Ошибка загрузки страницы: {str(e)[:100]}»)
return []

soup = BeautifulSoup(driver.page_source, ‘lxml’)
rows = soup.select(‘#sp-table tbody tr’)
parts = []

schematic_path = process_schematic(soup, model_name, subcategory_name)

with tqdm(total=len(rows),
desc=f»{subcategory_name[:30]:<30}»,
unit=»part»,
colour=’MAGENTA’,
bar_format=»{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]») as pbar:

for i, row in enumerate(rows):
try:
part_data = process_row(row, model_name, category_name, subcategory_name, schematic_path, i)
parts.append(part_data)

except Exception as e:
print(f»\nОшибка в строке {i + 1}: {str(e)[:100]}»)
finally:
pbar.update(1)

if len(parts) != len(rows):
print(f»\nВнимание: Обработано {len(parts)} из {len(rows)} запчастей»)

return parts


def process_row(row, model_name, category_name, subcategory_name, schematic_path, index):
article = row.select_one(‘td.views-field-field-cross-article-table a’).text.strip()
name = row.select_one(‘td.views-field-field-cross-small-name’).text.strip()

img_tag = row.select_one(‘img.img-responsive’)
img_url = img_tag[‘src’] if img_tag else »

return {
‘model’: model_name,
‘category’: f»{category_name} ({model_name.split()[-1]})»,
‘subcategory’: subcategory_name,
‘schematic’: schematic_path if index == 0 else »,
‘part_name’: name,
‘article’: article,
‘image’: process_image(img_url, model_name, article)
}

time.sleep(3)
def process_schematic(soup, model_name, subcategory_name):
schematic_img = soup.select_one(‘div.draw-i img’)
if not schematic_img or ‘src’ not in schematic_img.attrs:
return »

try:
schematic_url = BASE_URL + schematic_img[‘src’]
img_data = requests.get(schematic_url).content
schematic_filename = f»{model_name} — {subcategory_name[:50]}.jpg».replace(‘/’, ‘_’)
schematic_path = os.path.join(SCHEMATIC_FOLDER, schematic_filename)

with open(schematic_path, ‘wb’) as handler:
handler.write(img_data)
return schematic_path
except Exception as e:
print(f»Ошибка загрузки чертежа: {str(e)[:100]}»)
return »


def process_row(row, model_name, category_name, subcategory_name, schematic_path, index):
article_elem = row.select_one(‘td.views-field-field-cross-article-table a’)
article = article_elem.text.strip() if article_elem else ‘N/A’

name_elem = row.select_one(‘td.views-field-field-cross-small-name’)
name = name_elem.text.strip() if name_elem else ‘N/A’

img_tag = row.select_one(‘img.img-responsive’)
img_url = img_tag[‘src’] if img_tag else »
img_path = process_image(img_url, model_name, article)

return {
‘model’: model_name,
‘category’: category_name,
‘subcategory’: subcategory_name,
‘schematic’: schematic_path if index == 0 else »,
‘part_name’: name,
‘article’: article,
‘image’: img_path
}


def process_image(img_url, model_name, article):
if not img_url or ‘no-photo-big’ in img_url:
return ‘Фото нет’

try:
original_img_url = img_url.replace(‘styles/icon_image/public/’, »)
img_data = requests.get(original_img_url).content
img_filename = f»{model_name} — {article}.jpg»
img_path = os.path.join(IMG_FOLDER, img_filename)

with open(img_path, ‘wb’) as handler:
handler.write(img_data)
return img_path
except Exception as e:
print(f»Ошибка загрузки изображения {article}: {str(e)[:100]}»)
return ‘Ошибка загрузки’


def main():
driver = init_driver()
try:
for model in MODELS:
print(f»\n{‘=’ * 50}\nОбработка модели: {model[‘name’]}\n{‘=’ * 50}»)

categories = get_categories(driver, model[‘url’])
if not categories:
print(f»Не найдено категорий для модели {model[‘name’]}»)
continue

for category in tqdm(categories,
desc=»Категории».ljust(15),
unit=»cat»,
colour=’GREEN’):
subcategories = get_subcategories(driver, category[‘url’])

for subcategory in subcategories:
parts = get_parts(driver, subcategory[‘url’],
category[‘name’],
subcategory[‘name’],
model[‘name’])

if parts:
save_to_csv(parts, CSV_FILE)

print(f»\nМодель {model[‘name’]} обработана!»)

print(«\nВсе модели успешно обработаны!»)

finally:
driver.quit()


if __name__ == ‘__main__’:
main()

import sys
import time
import csv
import idna
import re
import random
import logging
from time import sleep
from bs4 import BeautifulSoup
from urllib.parse import urljoin, quote
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
import urllib3
import signal

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

logging.basicConfig(
filename=’parser.log’,
level=logging.INFO,
format=’%(asctime)s — %(levelname)s — %(message)s’
)

DOMAIN = «китайавто.рус»
PUNYCODE_DOMAIN = idna.encode(DOMAIN).decode(‘ascii’)
BASE_URL = f»https://{PUNYCODE_DOMAIN}»
CAPTCHA_URL = urljoin(BASE_URL, «/captcha/show-captcha»)
CHERY_CATALOG_URL = urljoin(BASE_URL, «/catalog/zapcasti-chery»)

class ParserState:
def __init__(self):
self.current_model = None
self.current_category = None
self.current_part = None

class GracefulExiter:
def __init__(self):
self.state = False
signal.signal(signal.SIGINT, self.change_state)

def change_state(self, signum, frame):
print(«\nЗавершение работы…»)
self.state = True

def exit(self):
return self.state

def init_driver():
chrome_options = Options()
chrome_options.add_argument(«—headless»)
chrome_options.add_argument(«—disable-gpu»)
chrome_options.add_argument(«—no-sandbox»)
chrome_options.add_argument(«—disable-dev-shm-usage»)
chrome_options.add_argument(«—window-size=1920,1080»)

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)
return driver

def handle_captcha(driver, state):
try:
if CAPTCHA_URL in driver.current_url:
logging.info(«Прохожу капчу…»)
print(f»Прохожу капчу :)»)
submit_button = driver.find_element(By.CSS_SELECTOR, ‘button.btn-primary’)
submit_button.click()
#sleep(0.5)

try:
chery_link = driver.find_element(By.XPATH, ‘//a[@href=»/catalog/zapcasti-chery»]’)
chery_link.click()
except NoSuchElementException:
driver.get(CHERY_CATALOG_URL)

#sleep(0.5)

if state.current_model:
driver.get(state.current_model)
if state.current_category:
driver.get(state.current_category)
if state.current_part:
driver.get(state.current_part)
return True
except Exception as e:
logging.error(f»Ошибка обработки капчи: {str(e)}»)
driver.save_screenshot(«captcha_error.png»)
print(f»Ошибка капчи. Скрин в папке»)
return False

def extract_models(soup):
models = []
items = soup.find_all(‘div’, class_=lambda x: x and ‘catalog-item’ in x)

for item in items:
try:
name_block = item.find(‘div’, class_=’catalog-item-name’)
if not name_block:
continue

a_tag = name_block.find(‘a’, href=True)
if a_tag:
model_name = a_tag.get_text(strip=True)
model_url = urljoin(BASE_URL, a_tag[‘href’])
models.append((model_name, model_url))

except Exception as e:
logging.warning(f»Ошибка обработки элемента модели: {str(e)}»)

return models

def extract_categories(soup):
categories = []
carousel_items = soup.find_all(‘div’, class_=’new-catalog__catalog-detail-carousel-item’)

for item in carousel_items:
try:
name_div = item.find(‘div’, class_=’new-catalog___catalog-detail-carousel-item-name’)
if name_div:
category_name = name_div.get_text(strip=True)
a_tag = item.find(‘a’, href=True)
if a_tag and ‘/catalog/’ in a_tag[‘href’]:
cat_url = urljoin(BASE_URL, a_tag[‘href’])
categories.append((category_name, cat_url))
except Exception as e:
logging.warning(f»Ошибка обработки элемента категории: {str(e)}»)

unique_categories = []
seen = set()
for name, url in categories:
if url not in seen:
seen.add(url)
unique_categories.append((name, url))

return unique_categories

def extract_parts(soup):
«»»Извлечение данных о запчастях с приоритетом data-articul»»»
parts = []
items = soup.find_all(‘li’)

for item in items:
a_tag = item.find(‘a’, href=True, attrs={«data-articul»: True})
if not a_tag:
continue

part_number = a_tag.get(‘data-articul’, »).strip()

part_name = a_tag.get_text(» «, strip=True)

if ‘ ‘ in part_name:
part_name = part_name.split(‘ ‘, 1)[1].strip()

if ‘ ‘ in part_name:
part_name = part_name.rsplit(‘ ‘, 1)[0].strip()

part_name = re.sub(r’\s+’, ‘ ‘, part_name).strip()

part_url = urljoin(BASE_URL, a_tag[‘href’])

parts.append((part_name, part_number, part_url))

return parts

def extract_part_details(driver, part_url):
«»»Извлечение изображения со страницы деталей»»»
try:
driver.get(part_url)
sleep(0.3)

soup = BeautifulSoup(driver.page_source, ‘html.parser’)

image_link = soup.find(‘a’, class_=’default-image’, href=True)
if image_link:
return urljoin(BASE_URL, image_link[‘href’])

# Вар2: Изображение в таблице
img_tag = soup.find(‘img’, src=lambda x: x and ‘uploads/images’ in x)
if img_tag:
return urljoin(BASE_URL, img_tag[‘src’])

return «Нет изображения»

except Exception as e:
logging.error(f»Ошибка получения изображения: {str(e)}»)
return «Ошибка»

def parse_models(driver, state):
try:
driver.get(CHERY_CATALOG_URL)
if handle_captcha(driver, state):
return parse_models(driver, state)

#sleep(random.uniform(1, 3))
driver.execute_script(«window.scrollTo(0, document.body.scrollHeight);»)
#sleep(1)

soup = BeautifulSoup(driver.page_source, ‘html.parser’)
return extract_models(soup)

except Exception as e:
logging.error(f»Ошибка при парсинге моделей: {str(e)}»)
raise

def parse_categories(driver, model_url, state):
try:
state.current_model = model_url
driver.get(model_url)
if handle_captcha(driver, state):
return parse_categories(driver, model_url, state)

#sleep(random.uniform(2, 3))
driver.execute_script(«window.scrollTo(0, document.body.scrollHeight);»)
#sleep(1)

soup = BeautifulSoup(driver.page_source, ‘html.parser’)
return extract_categories(soup)

except Exception as e:
logging.error(f»Ошибка при парсинге категорий: {str(e)}»)
raise

def parse_parts(driver, category_url, state):
try:
state.current_category = category_url
driver.get(category_url)
if handle_captcha(driver, state):
return parse_parts(driver, category_url, state)

driver.execute_script(«window.scrollTo(0, document.body.scrollHeight);»)

soup = BeautifulSoup(driver.page_source, ‘html.parser’)
parts = extract_parts(soup)

if not parts:
print(«Пустая категория»)
raise ValueError(«Пустая категория»)

return parts

except Exception as e:
logging.error(f»Ошибка при парсинге запчастей: {str(e)}»)
raise

def parse_part_details(driver, part_url, state):
«»»Парсинг деталей запчасти и извлечение URL изображения»»»
try:
state.current_part = part_url
driver.get(part_url)

if handle_captcha(driver, state):
return parse_part_details(driver, part_url, state)

return extract_part_details(driver, part_url)

except Exception as e:
logging.error(f»Ошибка при парсинге деталей: {str(e)}»)
return «ошибка»

def main():
driver = init_driver()
state = ParserState()
grace_exit = GracefulExiter()

try:
print(«Запуск парсера»)
models = parse_models(driver, state)
print(f»\nНайдено моделей: {len(models)}»)

with open(‘chery_parts.csv’, ‘w’, newline=», encoding=’utf-8-sig’) as file:
writer = csv.writer(file, delimiter=’\t’, quoting=csv.QUOTE_MINIMAL)
writer.writerow([‘Модель’, ‘Категория’, ‘Запчасть’, ‘Код’, ‘Фото’])
processed_articles = set()

for model_name, model_url in models:
if grace_exit.exit():
break

print(f»\nНачало обработки модели: {model_name}»)

try:
categories = parse_categories(driver, model_url, state)
print(f»Найдено категорий: {len(categories)}»)

for category_name, category_url in categories:
if grace_exit.exit():
break

print(f»\n—————————————Обработка категории: {category_name} для {model_name}»)

try:
parts = parse_parts(driver, category_url, state)
print(f»Найдено запчастей: {len(parts)}»)

for part_name, part_number, part_url in parts:
if grace_exit.exit():
break

clean_number = part_number.strip()
if clean_number in processed_articles:
continue
processed_articles.add(clean_number)

image_url = parse_part_details(driver, part_url, art, state)

writer.writerow([
model_name.strip(),
category_name.strip(),
part_name.strip(),
clean_number,
image_url.strip() if image_url else «нет фото»
])

print(f»з/ч: {part_name} | {clean_number}»)

except Exception as e:
print(f»Ошибка в категории: {str(e)}»)
continue

except Exception as e:
print(f»Ошибка модели: {str(e)}»)
continue

print(«\nГотово! Результаты сохранены в chery_parts.csv»)

except Exception as e:
logging.critical(f»Критическая ошибка: {str(e)}»)
finally:
print(«Работа завершена»)
driver.quit()
if grace_exit.exit():
print(«Работа прервана пользователем»)

if __name__ == «__main__»:
main()

import telebot
from telebot import types
import mysql.connector

bot = telebot.TeleBot(«*****»)

user_data = {}

@bot.message_handler(commands=[‘start’])
def send_welcome(message):
markup = types.ReplyKeyboardMarkup(resize_keyboard=True)
item1 = types.KeyboardButton(‘Информация о компании’)
item2 = types.KeyboardButton(‘Контакты’)
item3 = types.KeyboardButton(‘Оформить заказ’)
markup.add(item1, item2, item3)

bot.send_message(message.chat.id, «Здравствуйте! Выберите один из пунктов меню:», reply_markup=markup)

@bot.message_handler(content_types=[‘text’])
def handle_text(message):
chat_id = ******
if message.text == ‘Информация о компании’:

inline_markup = types.InlineKeyboardMarkup()
url_button = types.InlineKeyboardButton(text=»Сайт компании», url=»******«)
inline_markup.add(url_button)
bot.send_message(message.chat.id, «Компания «АС-****» уже более 10 лет на рынке в сфере грузоперевозок. В нашей компании работают профессионалы, готовые предоставить услуги в области логистики и транспортировки на высочайшем уровне.\n\nНаш адрес: *****», reply_markup=inline_markup)
elif message.text == ‘Контакты’:
bot.send_message(message.chat.id, ‘Номер телефона: +7-***-***-**-** \nЭлектронная почта: *****‘)
elif message.text == ‘Оформить заказ’:
user_data[chat_id] = {}
msg = bot.send_message(chat_id, ‘Введите ваше имя:’)
bot.register_next_step_handler(msg, process_name_step)
else:
bot.send_message(chat_id, ‘Извините, я не понимаю. Пожалуйста, выберите один из пунктов меню.’)

def process_name_step(message):
chat_id = *****
user_data[chat_id][‘name’] = message.text
msg = bot.send_message(chat_id, ‘Ваша почта:’)
bot.register_next_step_handler(msg, process_email_step)

def process_email_step(message):
chat_id = *****
user_data[chat_id][’email’] = message.text
msg = bot.send_message(chat_id, ‘Ваш номер телефона:’)
bot.register_next_step_handler(msg, process_phone_step)

def process_phone_step(message):
chat_id = *****
user_data[chat_id][‘phone’] = message.text
msg = bot.send_message(chat_id, ‘Комментарий:’)
bot.register_next_step_handler(msg, process_comment_step)

def process_comment_step(message):
chat_id = *****
user_data[chat_id][‘comment’] = message.text
bot.send_message(chat_id, ‘Спасибо за заказ, ожидайте обратной связи’)
order_info = f»Новый заказ через бота в телеграм:\nИмя: {user_data[chat_id][‘name’]}\nПочта: {user_data[chat_id][’email’]}\nелефон: {user_data[chat_id][‘phone’]}\nКомментарий: {user_data[chat_id][‘comment’]}»
bot.send_message(*****, order_info)

cnx = mysql.connector.connect(user=’*****’, password=’*****’, host=’*****’, database=’*****’)

cursor = cnx.cursor()

add_order = («INSERT INTO Orders_Avto «
«(IdOrder, NAME, Email, Phone, Comment) «
«VALUES (NULL, %s, %s, %s, %s)»)

data_order = (user_data[chat_id][‘name’], user_data[chat_id][’email’], user_data[chat_id][‘phone’], user_data[chat_id][‘comment’])

cursor.execute(add_order, data_order)

cnx.commit()

cursor.close()
cnx.close()

bot.polling()