2 Commits

117 changed files with 3993 additions and 70 deletions
+1
View File
@@ -1,6 +1,7 @@
__pycache__/
*.pyc
.venv/
.env
CONTEXT.md
CONTEXT.local.md
backups/
+9 -1
View File
@@ -6,8 +6,16 @@ services:
SECRET_KEY: ${SECRET_KEY:-change-me-please}
WEB_CONCURRENCY: ${WEB_CONCURRENCY:-4}
GUNICORN_THREADS: ${GUNICORN_THREADS:-5}
TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN:-}
TELEGRAM_CHAT_ID: ${TELEGRAM_CHAT_ID:-}
NEWS_API_TOKEN: ${NEWS_API_TOKEN:-}
TZ: ${TZ:-Europe/Moscow}
volumes:
- ./matrix.db:/app/matrix.db
- ./static/news_images:/app/static/news_images
- ./static/events_images:/app/static/events_images
- ./static/css:/app/static/css
- ./templates:/app/templates
restart: unless-stopped
nginx:
@@ -16,7 +24,7 @@ services:
depends_on:
- app
ports:
- "5000:80"
- "5002:80"
volumes:
- ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
restart: unless-stopped
BIN
View File
Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 32 KiB

+717
View File
@@ -0,0 +1,717 @@
#!/usr/bin/env python3
"""
Парсер новостей с mont.ru → публикует в ZKART БД.
Запуск: python3 mont_scraper.py [--all10]
"""
import re, os, sys, secrets, datetime, sqlite3, time, json
from urllib.request import urlopen, Request, build_opener, HTTPCookieProcessor
from http.cookiejar import CookieJar
from urllib.parse import urlencode, urlparse
from html import unescape
DB_PATH = "/home/ruslan/docker/ZKART#/matrix.db"
IMG_DIR = "/home/ruslan/docker/ZKART#/static/news_images"
BASE_URL = "https://www.mont.ru"
LIST_URL = "https://www.mont.ru/ru-ru/news?period=1"
SITE_BASE = "https://maps.4mont.ru"
TG_TOKEN = "8181219074:AAGvqWqb6t10YP4xpMOQnBq_6LrUqAFm5hM"
TG_CHAT_ID = "54986411"
MONT_EMAIL = "rgalyaviev@mont.com"
MONT_PASS = "utOgbZ09mont"
HEADERS = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
"Accept": "text/html,application/xhtml+xml,*/*;q=0.9"}
os.makedirs(IMG_DIR, exist_ok=True)
# ── Auth ──────────────────────────────────────────────────────────────────────
def make_authenticated_opener() -> build_opener:
"""Login to mont.ru via OIDC and return an opener with auth cookies."""
jar = CookieJar()
opener = build_opener(HTTPCookieProcessor(jar))
# Step 1: GET login → redirected to passport.mont.ru
req = Request(f"{BASE_URL}/ru-ru/account/login", headers=HEADERS)
with opener.open(req, timeout=20) as r:
html = r.read().decode("utf-8", errors="replace")
login_url = r.url
form_action = re.search(r'<form[^>]+action="([^"]+)"', html)
xsrf_m = re.search(r'name="idsrv\.xsrf"[^>]+value="([^"]+)"', html)
if not form_action or not xsrf_m:
raise RuntimeError("Login form not found")
parsed = urlparse(login_url)
action_url = f"{parsed.scheme}://{parsed.netloc}{form_action.group(1)}"
# Step 2: POST credentials
post_data = urlencode({
"username": MONT_EMAIL, "password": MONT_PASS, "idsrv.xsrf": xsrf_m.group(1)
}).encode()
req2 = Request(action_url, data=post_data,
headers={**HEADERS, "Content-Type": "application/x-www-form-urlencoded",
"Referer": login_url},
method="POST")
with opener.open(req2, timeout=20) as r:
html2 = r.read().decode("utf-8", errors="replace")
final_url = r.url
# Step 3: form_post with id_token back to www.mont.ru
form_action2 = re.search(r'<form[^>]+action="([^"]+)"', html2)
if form_action2:
action2 = form_action2.group(1)
hidden = re.findall(r'<input[^>]+type="hidden"[^>]+name="([^"]+)"[^>]+value="([^"]*)"', html2)
if not hidden:
hidden = re.findall(r'<input[^>]+name="([^"]+)"[^>]+type="hidden"[^>]+value="([^"]*)"', html2)
post_data3 = urlencode(dict(hidden)).encode()
req3 = Request(action2, data=post_data3,
headers={**HEADERS, "Content-Type": "application/x-www-form-urlencoded",
"Referer": final_url},
method="POST")
with opener.open(req3, timeout=20) as r:
r.read()
return opener
# ── Helpers ───────────────────────────────────────────────────────────────────
def tg_notify(text: str):
try:
payload = json.dumps({"chat_id": TG_CHAT_ID, "text": text, "parse_mode": "HTML"}).encode()
req = Request(f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage",
data=payload,
headers={"Content-Type": "application/json"},
method="POST")
with urlopen(req, timeout=10):
pass
except Exception as e:
print(f" [WARN] Telegram notify failed: {e}")
def strip_tags(html):
return unescape(re.sub(r"<[^>]+>", "", html)).strip()
ALLOWED_TAGS = re.compile(
r'<(/?)('
r'p|br|strong|b|em|i|u|s|ul|ol|li|a|h2|h3|h4|h5|blockquote|table|thead|tbody|tr|td|th'
r')(\b[^>]*)?>', re.IGNORECASE
)
ALLOWED_ATTRS = re.compile(r'\s+(href|target|rel)="([^"]*)"', re.IGNORECASE)
DANGEROUS_PROTOCOLS = re.compile(r'^(javascript|vbscript|data):', re.IGNORECASE)
def sanitize_html(html_body: str) -> str:
"""Keep formatting tags (bold, links, lists etc.) but strip everything unsafe."""
# Remove script/style blocks entirely
html_body = re.sub(r'<(script|style)[^>]*>.*?</\1>', '', html_body, flags=re.IGNORECASE | re.DOTALL)
# Remove HTML comments
html_body = re.sub(r'<!--.*?-->', '', html_body, flags=re.DOTALL)
result = []
pos = 0
for m in re.finditer(r'<[^>]+>', html_body):
# Text before this tag — escape it
result.append(unescape(html_body[pos:m.start()]))
pos = m.end()
tag = m.group(0)
tag_m = ALLOWED_TAGS.match(tag)
if not tag_m:
continue # strip unknown/dangerous tags
slash, name, attrs_raw = tag_m.group(1), tag_m.group(2).lower(), tag_m.group(3) or ""
if slash: # closing tag
result.append(f'</{name}>')
continue
# Build safe attribute string
safe_attrs = ""
if name == "a":
href_m = re.search(r'\bhref="([^"]*)"', attrs_raw, re.IGNORECASE)
if href_m:
href = href_m.group(1)
if not DANGEROUS_PROTOCOLS.match(href.strip()):
# Make relative mont.ru links absolute
if href.startswith("/"):
href = "https://www.mont.ru" + href
safe_attrs = f' href="{href}" target="_blank" rel="noopener"'
if name in ("br",):
result.append(f'<{name} />')
else:
result.append(f'<{name}{safe_attrs}>')
result.append(unescape(html_body[pos:]))
return "".join(result).strip()
def download_image(opener, img_src: str):
"""Download image from mont.ru, return local relative path or None."""
try:
from urllib.parse import quote
safe_path = quote(img_src, safe="/:.-_") if img_src.startswith("/") else img_src
url = BASE_URL + safe_path if img_src.startswith("/") else safe_path
ext = os.path.splitext(img_src.split("?")[0])[1].lower() or ".png"
if ext not in (".jpg", ".jpeg", ".png", ".webp", ".gif"):
ext = ".png"
fname = f"news_{secrets.token_hex(8)}{ext}"
path = os.path.join(IMG_DIR, fname)
req = Request(url, headers=HEADERS)
with opener.open(req, timeout=15) as resp:
with open(path, "wb") as f:
f.write(resp.read())
return f"news_images/{fname}"
except Exception as e:
print(f" [WARN] Image download failed: {e}")
return None
def slug_from(title, slug_id):
slug = re.sub(r"[^a-z0-9а-яё]+", "-", title.lower())
slug = re.sub(r"[а-яё]", "", slug)
slug = slug.strip("-")[:50] or f"mont-news-{slug_id}"
return f"{slug}-{slug_id}"
# ── News listing ──────────────────────────────────────────────────────────────
def get_news_ids_from_listing(opener) -> tuple[list[str], dict[str, str]]:
"""Return (list of IDs, dict of id→img_src) from the listing page."""
req = Request(LIST_URL, headers=HEADERS)
with opener.open(req, timeout=20) as r:
html = r.read().decode("utf-8", errors="replace")
# Pair images with the nearest following news link (within 2000 chars)
imgs = [(m.start(), m.group(1)) for m in re.finditer(r'src="(/Content/Images/[^"]+)"', html)]
links = [(m.start(), m.group(1)) for m in re.finditer(r'href="/ru-ru/news/(\d+)"', html)]
id_to_img = {}
for img_pos, img_src in imgs:
for link_pos, art_id in links:
if link_pos > img_pos and link_pos - img_pos < 2000:
if art_id not in id_to_img:
id_to_img[art_id] = img_src
break
# Full ordered list of IDs
ids = list(dict.fromkeys(art_id for _, art_id in links))
return ids, id_to_img
def get_max_slug_id() -> int:
"""Return the highest mont.ru article ID already in our DB."""
try:
conn = sqlite3.connect(DB_PATH, timeout=10)
rows = conn.execute("SELECT slug FROM news ORDER BY id DESC LIMIT 50").fetchall()
conn.close()
ids = []
for (slug,) in rows:
m = re.search(r"-(\d{4,})$", slug)
if m:
ids.append(int(m.group(1)))
return max(ids) if ids else 0
except Exception:
return 0
def is_already_saved(slug_id: str) -> bool:
conn = sqlite3.connect(DB_PATH, timeout=10)
row = conn.execute("SELECT id FROM news WHERE slug LIKE ?", (f"%-{slug_id}",)).fetchone()
conn.close()
return row is not None
# ── Fetch & save one article ──────────────────────────────────────────────────
def fetch_and_save_article(opener, slug_id: str, listing_img: str = "") -> tuple[bool, str, str]:
"""
Fetch article from API, save to DB.
Returns (saved: bool, title: str, slug: str)
"""
if is_already_saved(slug_id):
print(f" [SKIP] Already exists: {slug_id}")
return False, "", ""
# Fetch article data via authenticated API
api_url = f"{BASE_URL}/ru-ru/apiMvc/news/{slug_id}"
req = Request(api_url, headers={**HEADERS, "Accept": "application/json, text/plain, */*"})
try:
with opener.open(req, timeout=20) as r:
data = json.loads(r.read().decode("utf-8", errors="replace"))
except Exception as e:
print(f" [WARN] API fetch failed for {slug_id}: {e}")
return False, "", ""
title = strip_tags(data.get("title", "")).strip()
text_html = data.get("text", "") or ""
body = sanitize_html(text_html)
if not title or len(title) < 5:
print(f" [SKIP] No title for {slug_id}")
return False, "", ""
# Check not a 404 page
if "страница не найдена" in title.lower() or "404" in title:
print(f" [SKIP] 404 page for {slug_id}")
return False, "", ""
print(f" [FETCH] {title[:70]}...")
# Image: prefer listing image (most reliable), then API fields, then article page
img_src = listing_img or data.get("image") or data.get("img") or data.get("previewImage") or ""
image_path = None
if img_src:
image_path = download_image(opener, img_src)
if not image_path:
# Try scraping the article HTML page for an image
try:
req2 = Request(f"{BASE_URL}/ru-ru/news/{slug_id}", headers=HEADERS)
with opener.open(req2, timeout=15) as r:
pg = r.read().decode("utf-8", errors="replace")
img_m = re.search(r'src="(/Content/Images/[^"]+)"', pg)
if img_m:
image_path = download_image(opener, img_m.group(1))
except Exception:
pass
slug = slug_from(title, slug_id)
created_at = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conn = sqlite3.connect(DB_PATH, timeout=15)
try:
conn.execute(
"INSERT INTO news(title, body, slug, image, published, created_at) VALUES (?,?,?,?,1,?)",
(title, body, slug, image_path, created_at)
)
conn.commit()
print(f" [OK] Published: {title[:70]}")
except sqlite3.IntegrityError:
slug = f"{slug}-{secrets.token_hex(3)}"
conn.execute(
"INSERT INTO news(title, body, slug, image, published, created_at) VALUES (?,?,?,?,1,?)",
(title, body, slug, image_path, created_at)
)
conn.commit()
print(f" [OK] Published (alt slug): {title[:70]}")
finally:
conn.close()
time.sleep(0.5)
return True, title, slug
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
all10 = "--all10" in sys.argv
print(f"[{datetime.datetime.now():%Y-%m-%d %H:%M}] Logging in to mont.ru...")
errors = []
saved_count = 0
published = []
try:
opener = make_authenticated_opener()
except Exception as e:
msg = f"Ошибка авторизации на mont.ru: {e}"
print(f"Auth error: {e}")
tg_notify(f"🚨 <b>MONT парсер</b>\n{msg}")
return
try:
print("Fetching news listing...")
listing_ids, id_to_img = get_news_ids_from_listing(opener)
known_max = get_max_slug_id()
probe_ids = [str(i) for i in range(known_max + 1, known_max + 6)]
all_ids = list(dict.fromkeys(listing_ids + probe_ids))
if all10:
candidate_ids = all_ids[:15]
else:
candidate_ids = [sid for sid in all_ids if not is_already_saved(sid)]
if candidate_ids:
print(f"Candidates: {candidate_ids}")
for sid in candidate_ids:
ok, title, slug = fetch_and_save_article(opener, sid, listing_img=id_to_img.get(sid, ""))
if ok:
saved_count += 1
published.append((title, slug))
if saved_count > 0:
_, refreshed_imgs = get_news_ids_from_listing(opener)
conn = sqlite3.connect(DB_PATH, timeout=15)
for sid in candidate_ids:
img_src = refreshed_imgs.get(sid)
if img_src:
row = conn.execute(
"SELECT id, image FROM news WHERE slug LIKE ?", (f"%-{sid}",)
).fetchone()
if row and not row[1]:
path = download_image(opener, img_src)
if path:
conn.execute("UPDATE news SET image=? WHERE id=?", (path, row[0]))
conn.commit()
conn.close()
else:
print("No new news.")
except Exception as e:
msg = f"Ошибка парсинга новостей: {e}"
print(f"News error: {e}")
errors.append(msg)
print(f"Done. News saved: {saved_count}")
# Hide outdated events
hidden_count = hide_outdated_events()
if hidden_count:
print(f"Hidden outdated events: {hidden_count}")
# Scrape events
ev_count, ev_published, ev_error = scrape_events(opener)
if ev_error:
errors.append(ev_error)
# Telegram: send only if something new OR errors
tg_lines = []
if saved_count > 0:
suffix = "ь" if saved_count == 1 else "и" if 2 <= saved_count <= 4 else "ей"
tg_lines.append(f"✅ <b>Новости</b>: {saved_count} новост{suffix}:")
for title, slug in published:
tg_lines.append(f' • <a href="{SITE_BASE}/news/{slug}">{title}</a>')
if ev_count > 0:
suffix = "е" if ev_count == 1 else "я" if 2 <= ev_count <= 4 else "й"
tg_lines.append(f"📅 <b>Мероприятия</b>: {ev_count} мероприяти{suffix}:")
for title, slug in ev_published:
tg_lines.append(f' • <a href="{SITE_BASE}/events/{slug}">{title}</a>')
for err in errors:
tg_lines.append(f"🚨 {err}")
if tg_lines:
tg_notify("\n".join(tg_lines))
# ── Events scraper ────────────────────────────────────────────────────────────
EVENTS_LIST_URL = "https://www.mont.ru/ru-ru/events?eventPeriod=1"
EVENTS_IMAGES_DIR = "/home/ruslan/docker/ZKART#/static/events_images"
os.makedirs(EVENTS_IMAGES_DIR, exist_ok=True)
def parse_event_date(raw: str) -> str | None:
"""Parse various date formats to YYYY-MM-DD, return None if unparseable."""
if not raw:
return None
raw = raw.strip()
# ISO format
m = re.match(r"(\d{4})-(\d{2})-(\d{2})", raw)
if m:
return f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
# DD.MM.YYYY or DD/MM/YYYY
m = re.match(r"(\d{1,2})[./](\d{1,2})[./](\d{4})", raw)
if m:
return f"{m.group(3)}-{m.group(2).zfill(2)}-{m.group(1).zfill(2)}"
# D Month YYYY (Russian)
months_ru = {"января":"01","февраля":"02","марта":"03","апреля":"04","мая":"05","июня":"06",
"июля":"07","августа":"08","сентября":"09","октября":"10","ноября":"11","декабря":"12"}
m = re.match(r"(\d{1,2})\s+([а-яё]+)\s+(\d{4})", raw.lower())
if m:
mon = months_ru.get(m.group(2))
if mon:
return f"{m.group(3)}-{mon}-{m.group(1).zfill(2)}"
return None
def download_event_image(opener, img_src: str) -> str | None:
try:
from urllib.parse import quote
safe_path = quote(img_src, safe="/:.-_") if img_src.startswith("/") else img_src
url = BASE_URL + safe_path if img_src.startswith("/") else safe_path
ext = os.path.splitext(img_src.split("?")[0])[1].lower() or ".png"
if ext not in (".jpg", ".jpeg", ".png", ".webp", ".gif"):
ext = ".png"
fname = f"event_{secrets.token_hex(8)}{ext}"
path = os.path.join(EVENTS_IMAGES_DIR, fname)
req = Request(url, headers=HEADERS)
with opener.open(req, timeout=15) as resp:
with open(path, "wb") as f:
f.write(resp.read())
return f"events_images/{fname}"
except Exception as e:
print(f" [WARN] Event image download failed: {e}")
return None
def get_event_ids_from_listing(opener) -> tuple[list[str], dict]:
"""Use JSON API to get all upcoming events — returns more than the HTML listing."""
import json as _json
api_url = "https://www.mont.ru/ru-ru/apiMvc/events?eventPeriod=1&perPageCount=100"
req = Request(api_url, headers=HEADERS)
with opener.open(req, timeout=20) as r:
data = _json.loads(r.read().decode("utf-8", errors="replace"))
ids = []
id_to_img = {}
id_to_date = {}
for ev in data.get("events", []):
eid = str(ev.get("eventId", ""))
if not eid:
continue
ids.append(eid)
img = ev.get("backgroundImageUrl", "")
if img:
id_to_img[eid] = img
start = ev.get("start", "")
if start:
id_to_date[eid] = start[:10] # "2026-06-09T10:00:00" → "2026-06-09"
return ids, id_to_img, id_to_date
def fetch_and_save_event(opener, eid: str, listing_img: str = "", listing_date: str = "") -> tuple[bool, str, str]:
from zkart_db_shim import is_event_saved, create_event
if is_event_saved(eid):
print(f" [SKIP] Event already exists: {eid}")
return False, "", ""
# Try API first
api_url = f"{BASE_URL}/ru-ru/apiMvc/events/{eid}"
req = Request(api_url, headers={**HEADERS, "Accept": "application/json, text/plain, */*"})
data = {}
try:
with opener.open(req, timeout=20) as r:
data = json.loads(r.read().decode("utf-8", errors="replace"))
except Exception:
pass
title = strip_tags(data.get("title", "") or data.get("name", "")).strip()
body_html = data.get("text", "") or data.get("description", "") or ""
body = sanitize_html(body_html)
# Fallback: scrape article page
if not title:
try:
req2 = Request(f"{BASE_URL}/ru-ru/events/{eid}", headers=HEADERS)
with opener.open(req2, timeout=20) as r:
pg = r.read().decode("utf-8", errors="replace")
h1 = re.search(r'<h1[^>]*>(.*?)</h1>', pg, re.DOTALL)
if h1:
title = strip_tags(h1.group(1)).strip()
if not body:
content_m = re.search(r'<div[^>]+class="[^"]*content[^"]*"[^>]*>(.*?)</div>', pg, re.DOTALL | re.IGNORECASE)
if content_m:
body = sanitize_html(content_m.group(1))
# Try to get date from page
if not listing_date:
dm = re.search(r'(\d{1,2}[./]\d{1,2}[./]\d{4}|\d{1,2}\s+[а-яё]+\s+\d{4})', pg, re.IGNORECASE)
if dm:
listing_date = parse_event_date(dm.group(1)) or ""
except Exception as e:
print(f" [WARN] Event page fetch failed: {e}")
if not title or len(title) < 4:
print(f" [SKIP] No title for event {eid}")
return False, "", ""
print(f" [FETCH] Event: {title[:70]}...")
# Date
event_date = listing_date
if not event_date:
for field in ("date", "startDate", "start_date", "eventDate", "dateStart"):
raw = data.get(field, "")
if raw:
event_date = parse_event_date(str(raw)) or ""
if event_date:
break
if not event_date:
event_date = datetime.date.today().strftime("%Y-%m-%d")
# Image
img_src = listing_img or data.get("image") or data.get("img") or data.get("previewImage") or ""
image_path = None
if img_src:
image_path = download_event_image(opener, img_src)
slug_base = slug_from(title, eid)
conn = sqlite3.connect(DB_PATH, timeout=15)
try:
conn.execute(
"INSERT INTO events(title, body, slug, image, event_date, published) VALUES (?,?,?,?,?,1)",
(title, body, slug_base, image_path, event_date)
)
conn.commit()
print(f" [OK] Event saved: {title[:60]} ({event_date})")
except sqlite3.IntegrityError:
slug_base = f"{slug_base}-{secrets.token_hex(3)}"
conn.execute(
"INSERT INTO events(title, body, slug, image, event_date, published) VALUES (?,?,?,?,?,1)",
(title, body, slug_base, image_path, event_date)
)
conn.commit()
finally:
conn.close()
time.sleep(0.4)
return True, title, slug_base
def hide_outdated_events() -> int:
"""Set published=0 for events where event_date <= today."""
conn = sqlite3.connect(DB_PATH, timeout=10)
cur = conn.execute(
"UPDATE events SET published=0 WHERE published=1 AND event_date <= date('now','localtime')"
)
count = cur.rowcount
conn.commit()
conn.close()
return count
def parse_event_page(html: str) -> dict:
"""Extract body, register_url, image_src from events-details page HTML."""
import re as _re
from html import unescape as _u
# Description: events-details__about block
body = ""
about_m = _re.search(
r'class="events-details__about[^"]*"[^>]*>.*?<div[^>]*>(.*?)</div>\s*</div>\s*</div>',
html, _re.DOTALL
)
if about_m:
body = sanitize_html(about_m.group(1))
# Registration URL
reg_m = _re.search(r'class="[^"]*register-btn[^"]*"[^>]+href="([^"]+)"', html, _re.IGNORECASE)
_raw_reg = reg_m.group(1) if reg_m else ""
if _raw_reg.startswith("/"):
_raw_reg = "https://www.mont.ru" + _raw_reg
register_url = _raw_reg
# Cover background image
cover_m = _re.search(r'events-details__background[^>]+style="background-image:\s*url\(&quot;([^&]+)&quot;\)', html)
img_src = cover_m.group(1) if cover_m else ""
# Fallback: vendor logo
if not img_src:
logo_m = _re.search(r'events-details__logo[^>]*>.*?<img[^>]+src="([^"]+)"', html, _re.DOTALL)
if logo_m:
img_src = logo_m.group(1)
# Fallback: any /Content/Images
if not img_src:
ci_m = _re.search(r'src="(/Content/Images/[^"]+)"', html)
if ci_m:
img_src = ci_m.group(1)
# Date from events-details__dates
date_m = _re.search(r'events-details__dates[^>]*>.*?(\d{1,2}\.\d{2}\.\d{4})', html, _re.DOTALL)
date_str = parse_event_date(date_m.group(1)) if date_m else ""
return {"body": body, "register_url": register_url, "img_src": img_src, "date_str": date_str}
def scrape_events(opener=None):
print(f"[{datetime.datetime.now():%Y-%m-%d %H:%M}] Scraping events...")
def is_event_saved(eid):
conn = sqlite3.connect(DB_PATH, timeout=10)
row = conn.execute("SELECT id FROM events WHERE slug LIKE ?", (f"%-{eid}",)).fetchone()
conn.close()
return row is not None
def save_event(title, body, slug, image_path, event_date, register_url):
conn = sqlite3.connect(DB_PATH, timeout=15)
try:
conn.execute(
"INSERT INTO events(title, body, slug, image, event_date, published, register_url) VALUES (?,?,?,?,?,1,?)",
(title, body, slug, image_path, event_date, register_url)
)
conn.commit()
return slug
except sqlite3.IntegrityError:
s2 = f"{slug}-{secrets.token_hex(3)}"
conn.execute(
"INSERT INTO events(title, body, slug, image, event_date, published, register_url) VALUES (?,?,?,?,?,1,?)",
(title, body, s2, image_path, event_date, register_url)
)
conn.commit()
return s2
finally:
conn.close()
if opener is None:
try:
opener = make_authenticated_opener()
except Exception as e:
msg = f"Ошибка авторизации (events): {e}"
print(f" Auth error: {e}")
return 0, [], msg
try:
ids, id_to_img, id_to_date = get_event_ids_from_listing(opener)
except Exception as e:
msg = f"Ошибка листинга мероприятий: {e}"
print(f" Listing error: {e}")
return 0, [], msg
candidates = [eid for eid in ids if not is_event_saved(eid)]
if not candidates:
print(" No new events.")
return 0, [], None
print(f" Event candidates: {candidates}")
saved_count = 0
published = []
for eid in candidates:
# Fetch full event page HTML (contains all data)
try:
req = Request(f"{BASE_URL}/ru-ru/events/{eid}", headers=HEADERS)
with opener.open(req, timeout=20) as r:
pg = r.read().decode("utf-8", errors="replace")
except Exception as e:
print(f" [WARN] Could not fetch event page {eid}: {e}")
continue
parsed = parse_event_page(pg)
body = parsed["body"]
register_url = parsed["register_url"]
img_src = parsed["img_src"] or id_to_img.get(eid, "")
event_date = parsed["date_str"] or id_to_date.get(eid, "")
# Title from h1
h1_m = re.search(r'<h1[^>]*>(.*?)</h1>', pg, re.DOTALL)
title = strip_tags(h1_m.group(1)).strip() if h1_m else ""
if not title:
title = strip_tags(re.search(r'events-details__title[^>]*>(.*?)</[^>]+>', pg, re.DOTALL).group(1)).strip() if re.search(r'events-details__title[^>]*>(.*?)</[^>]+>', pg, re.DOTALL) else ""
if not title or len(title) < 4:
print(f" [SKIP] No title for event {eid}")
continue
if "страница не найдена" in title.lower() or "404" in title:
print(f" [SKIP] 404 for event {eid}")
continue
if not body:
body = title # at minimum use title as body
if not event_date:
event_date = datetime.date.today().strftime("%Y-%m-%d")
# Download image
image_path = None
if img_src:
image_path = download_event_image(opener, img_src)
slug = slug_from(title, eid)
final_slug = save_event(title, body, slug, image_path, event_date, register_url)
print(f" [OK] Event: {title[:60]} ({event_date}){' +reg' if register_url else ''}")
saved_count += 1
published.append((title, final_slug))
time.sleep(0.4)
return saved_count, published, None
if __name__ == "__main__":
main()
+1 -1
View File
@@ -6,7 +6,7 @@ server {
location / {
proxy_pass http://app:8000;
proxy_set_header Host $host;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
+137 -2
View File
@@ -1,7 +1,7 @@
:root { --b:#1f4ea3; --line:#cfe0ff; }
* { box-sizing: border-box; }
body { margin:0; font-family:Manrope,sans-serif; background:#f0f5ff; color:#1a2746; }
body.ib { background:#fff1f1; }
body.ib { background:#eefaf3; }
.wrap { width:min(1600px, calc(100% - 24px)); margin:12px auto 24px; }
.top {
background: linear-gradient(130deg, #1f4ea3, #3977df);
@@ -13,7 +13,7 @@
align-items:center;
gap:10px;
}
body.ib .top { background: linear-gradient(130deg, #9b2f3a, #c24a56); }
body.ib .top { background: linear-gradient(130deg, #1f7a4a, #37a96b); }
.scope-switch { display:flex; gap:8px; }
.scope-chip {
display:inline-block;
@@ -26,6 +26,9 @@
border:1px solid #ccdcff;
}
.scope-chip.active { background:#fff; color:#112847; }
.top-actions { display:flex; gap:8px; align-items:center; }
.top-actions a,
.top-actions form { margin:0; }
.grid { display:grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap:10px; margin:12px 0; }
.box {
background:#fff;
@@ -54,12 +57,44 @@
.pri { background:#1f4ea3; color:#fff; }
.warn { background:#e8eefc; color:#223963; }
.danger { background:#ffefef; color:#8e1d1d; }
.alerts { display:grid; gap:8px; margin:12px 0; }
.alert { border-radius:10px; padding:10px 12px; font-weight:700; border:1px solid #cfe0ff; background:#fff; }
.alert.ok { color:#14532d; background:#ecfdf3; border-color:#bde9cb; }
.alert.error { color:#8e1d1d; background:#ffefef; border-color:#ffcaca; }
.lists { display:grid; grid-template-columns:1fr 1fr 1fr; gap:10px; margin-bottom:10px; }
.list-box { max-height: 430px; overflow-y: auto; padding-right: 4px; }
.list-box::-webkit-scrollbar { width:12px; }
.list-box::-webkit-scrollbar-thumb { background:#bfd4ff; border-radius:10px; }
.list-item { display:flex; justify-content:space-between; align-items:center; gap:8px; border:1px solid var(--line); border-radius:10px; padding:6px 8px; margin-bottom:6px; background:#fff; min-height: 36px; }
.product-item { border:1px solid var(--line); border-radius:10px; padding:6px 8px; margin-bottom:6px; background:#fff; }
.product-row { display:flex; justify-content:space-between; align-items:center; gap:8px; min-height:36px; }
.product-row span { min-width:0; overflow-wrap:anywhere; }
.product-actions { display:flex; gap:8px; align-items:center; flex-shrink:0; }
.product-actions form { margin:0; }
.product-edit { display:grid; grid-template-columns: minmax(120px, 1fr) minmax(150px, 1.2fr) auto; gap:8px; margin-top:8px; }
.product-edit[hidden] { display:none; }
.pending-box { margin:12px 0; }
.pending-head { display:flex; justify-content:space-between; align-items:center; gap:10px; margin-bottom:10px; }
.pending-head h3 { margin:0; }
.pending-head form { margin:0; }
.pending-list { display:grid; gap:8px; }
.pending-item { display:grid; grid-template-columns:minmax(0, 1fr) auto; gap:10px; align-items:start; border:1px solid var(--line); border-radius:10px; padding:8px; }
.pending-item span { display:block; color:#49638f; font-size:12px; margin-top:2px; }
.pending-desc { margin-top:8px; padding:8px; border-radius:8px; background:#f6f9ff; white-space:pre-wrap; font-size:13px; line-height:1.45; }
.pending-actions { display:flex; gap:8px; }
.pending-actions form { margin:0; }
.admin-users-box { margin:12px 0; }
.created-admin-card { display:grid; gap:6px; margin:0 0 10px; padding:10px 12px; border:1px solid #9fd7b1; border-radius:10px; background:#ecfdf3; color:#14532d; }
.created-admin-card code { display:inline-block; padding:3px 6px; border-radius:6px; background:#fff; color:#102a1a; font-weight:800; }
.created-admin-share { display:grid; grid-template-columns:minmax(0, 1fr) auto; gap:8px; align-items:stretch; margin-top:4px; }
.created-admin-share textarea { width:100%; min-height:84px; resize:vertical; padding:9px 10px; border:1px solid #9fd7b1; border-radius:9px; font:700 13px/1.45 Manrope,sans-serif; color:#102a1a; background:#fff; }
.admin-create { display:grid; grid-template-columns:minmax(160px, 1fr) auto auto auto; gap:8px; align-items:center; }
.admin-create label { display:flex; align-items:center; gap:6px; font-weight:700; white-space:nowrap; }
.admin-users-list { display:grid; gap:6px; margin-top:10px; }
.admin-user-item { display:flex; justify-content:space-between; align-items:center; gap:8px; border:1px solid var(--line); border-radius:10px; padding:7px 8px; }
.admin-user-item span { display:grid; gap:2px; }
small { color:#60759d; font-size:11px; }
.matrix-wrap { background:#fff; border:1px solid #d4e3ff; border-radius:12px; padding:10px; }
.matrix-scroll { overflow:auto; max-height:72vh; border:1px solid #dce7ff; border-radius:10px; }
.matrix-scroll::-webkit-scrollbar,
@@ -75,3 +110,103 @@
th:first-child { z-index: 3; }
td input { transform: scale(1.05); }
.matrix-tip { margin:0 0 6px; font-size:12px; color:#37507d; }
@media (max-width: 980px) {
.wrap {
width: calc(100% - 16px);
margin: 8px auto 16px;
}
.top {
flex-direction: column;
align-items: stretch;
padding: 12px;
}
.top-actions {
width: 100%;
display: grid !important;
grid-template-columns: 1fr 1fr 1fr;
}
.top-actions a,
.top-actions form,
.top-actions button {
width: 100%;
}
.scope-switch {
flex-wrap: wrap;
}
.scope-chip {
flex: 1 1 auto;
text-align: center;
}
.grid {
grid-template-columns: 1fr;
}
.lists {
grid-template-columns: 1fr;
}
.list-box {
max-height: 300px;
}
.list-item {
align-items: flex-start;
flex-wrap: wrap;
}
.product-row,
.product-actions,
.pending-head,
.pending-item,
.pending-actions,
.admin-create,
.admin-user-item,
.created-admin-share {
display:grid;
grid-template-columns:1fr;
width:100%;
}
.product-edit {
grid-template-columns:1fr;
}
.matrix-wrap {
padding: 8px;
}
.matrix-scroll {
max-height: 62vh;
}
th, td {
font-size: 11px;
padding: 5px;
}
th:first-child,
td:first-child {
min-width: 170px;
}
input[type="text"],
select,
button {
min-height: 40px;
}
}
@media (max-width: 600px) {
.top-actions {
grid-template-columns: 1fr;
}
.inline-product {
grid-template-columns: 1fr;
}
.matrix-h-scroll {
height: 24px;
}
.matrix-scroll::-webkit-scrollbar,
.matrix-h-scroll::-webkit-scrollbar {
height: 18px;
width: 12px;
}
th:first-child,
td:first-child {
min-width: 145px;
}
.matrix-tip {
font-size: 11px;
}
}
+168 -1
View File
@@ -183,8 +183,163 @@
font-size: 16px;
}
.board {
/* ── Page layout: main content + news sidebar ── */
.page-layout {
margin-top: 18px;
display: grid;
grid-template-columns: 1fr 300px;
gap: 20px;
align-items: start;
}
.main-col {
min-width: 0;
}
.news-sidebar {
position: sticky;
top: 18px;
}
.news-widget {
background: #fff;
border-radius: var(--radius);
border: 1px solid #dfebff;
box-shadow: 0 10px 30px rgba(24, 56, 116, .08);
overflow: hidden;
}
.news-widget-head {
padding: 14px 16px 10px;
border-bottom: 1px solid #edf3ff;
display: flex;
align-items: center;
gap: 8px;
}
.news-all-link {
font-size: 12px; font-weight: 700; color: var(--brand-2);
text-decoration: none; white-space: nowrap;
padding: 3px 10px; border-radius: 999px;
background: #eef4ff; border: 1px solid #c8d8f7;
transition: .15s;
}
.news-all-link:hover { background: #dbe8ff; }
.news-widget-head h2 {
margin: 0;
font-size: 13px;
font-weight: 800;
text-transform: uppercase;
letter-spacing: 1px;
color: #234782;
display: flex;
align-items: center;
gap: 7px;
}
.news-widget-head h2::before {
content: "";
display: inline-block;
width: 3px;
height: 14px;
border-radius: 2px;
background: linear-gradient(180deg, #3978e0, #1f4ea3);
flex-shrink: 0;
}
.news-list {
display: flex;
flex-direction: column;
}
.news-card {
border-bottom: 1px solid #f0f5ff;
transition: background .15s;
overflow: hidden;
display: flex;
align-items: center;
text-decoration: none;
}
.news-card:last-child {
border-bottom: none;
}
.news-card:hover {
background: #f8fbff;
}
.news-card-img-wrap {
flex-shrink: 0;
width: 62px; height: 62px;
margin: 10px 0 10px 12px;
border-radius: 8px;
overflow: hidden;
background: #e8f0ff;
display: flex; align-items: center; justify-content: center;
}
.news-card-img {
width: 62px; height: 62px;
object-fit: cover;
display: block;
}
.news-card-no-img {
font-size: 22px;
line-height: 1;
}
.news-card-body {
padding: 10px 12px 10px 10px;
flex: 1;
min-width: 0;
display: flex;
flex-direction: column;
gap: 4px;
}
.news-card-date {
font-size: 10px;
font-weight: 700;
color: #b0c4df;
letter-spacing: .3px;
text-transform: uppercase;
}
.news-card-title {
margin: 0;
font-size: 12.5px;
font-weight: 700;
color: #1a3e79;
line-height: 1.4;
}
.news-card-btn {
display: inline-flex;
align-items: center;
font-size: 11px;
font-weight: 700;
color: var(--brand-2);
text-decoration: none;
margin-top: 2px;
transition: .15s;
}
.news-card-btn:hover {
color: var(--brand);
transform: translateX(2px);
}
.news-empty {
padding: 20px 16px;
font-size: 13px;
color: #9ab0d0;
margin: 0;
}
.board {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 16px;
@@ -538,12 +693,24 @@
transform: scale(1.04);
}
@media (max-width: 1100px) {
.page-layout { grid-template-columns: 1fr 260px; }
}
@media (max-width: 980px) {
.brand-logo { max-width: 160px; }
.board { grid-template-columns: 1fr; }
.hero { padding: 20px; }
.credit { right: 8px; bottom: 6px; }
#btn-contact-ruslan { font-size: 14px; }
.page-layout { grid-template-columns: 1fr; }
.news-sidebar { position: static; }
.news-list { flex-direction: column; }
}
@media (max-width: 640px) {
.news-list { flex-direction: column; }
.news-card { border-right: none; border-bottom: 1px solid #f0f5ff; }
}
@media (max-width: 768px) {
Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

+25
View File
@@ -0,0 +1,25 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" role="img" aria-labelledby="title desc">
<title id="title">4MONT favicon</title>
<desc id="desc">A compact favicon inspired by the 4MONT logo: a blue geometric 4 and bold black M on a clean rounded square.</desc>
<defs>
<linearGradient id="blue" x1="5" y1="8" x2="38" y2="58" gradientUnits="userSpaceOnUse">
<stop offset="0" stop-color="#0C5CAD"/>
<stop offset="0.45" stop-color="#004C92"/>
<stop offset="1" stop-color="#002F62"/>
</linearGradient>
<filter id="softShadow" x="-20%" y="-20%" width="140%" height="140%">
<feDropShadow dx="0" dy="2" stdDeviation="2" flood-color="#001A33" flood-opacity="0.16"/>
</filter>
</defs>
<rect x="3" y="3" width="58" height="58" rx="14" fill="#FFFFFF"/>
<rect x="3.5" y="3.5" width="57" height="57" rx="13.5" fill="none" stroke="#E6EAF0"/>
<g filter="url(#softShadow)">
<!-- Stylized 4 -->
<path fill="url(#blue)" d="M7 38.7 27.4 10.2h10.4v28.5h6.3v8.9h-6.3v7.3H27.9v-7.3H7v-8.9Zm20.9 0V25.2L18 38.7h9.9Z"/>
<!-- Compact M -->
<path fill="#050505" d="M39.2 54.9V10.2h9.4l5.7 16.1 5.7-16.1h9.1v44.7h-8.7V30.2l-4.7 13.3h-3.1l-4.8-13.3v24.7h-8.6Z" transform="translate(-5.4 0)"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 254 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.6 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 952 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 254 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

Some files were not shown because too many files have changed in this diff Show More