feat: session bundle import/export to skip Playwright auto-login
Adds a lightweight way to pre-seed a Lingma workDir with an existing logged-in session: - New module session_bundle.py packs/unpacks only the four cache files that make up a Lingma login (id, user, quota, config.json). Everything else (db, logs, index, diagnosis) stays local so bundles stay tiny and never leak session-specific artefacts. - Safety: path-traversal/symlink members are rejected; size is capped; refuses to export from a workDir that isn't actually logged in; sensitive cache/user is chmod'd 0600 on restore. - LingmaAccount gains optional session_bundle_b64 / session_bundle_file; LINGMA_SESSION_BUNDLE[_FILE] env provide the singleton fallback. Credentials become optional when a bundle is supplied. - LingmaPool.start() restores the bundle into each instance workDir only if it isn't already logged in, so persistent volumes aren't clobbered and a corrupt bundle falls back to Playwright gracefully. - POST /internal/session/export returns the bundle as base64; ?instance= selects a specific pool instance. Requires an authed, already-logged-in instance to prevent exporting empties. - README + .env.example document the end-to-end flow. Made-with: Cursor
This commit is contained in:
@@ -9,6 +9,11 @@ from dataclasses import dataclass, field
|
||||
class LingmaAccount:
|
||||
username: str
|
||||
password: str
|
||||
# Optional: pre-captured Lingma session to skip Playwright auto-login.
|
||||
# Either inline base64 of a tar.gz bundle, or a path on disk holding the
|
||||
# same. Inline wins if both are set.
|
||||
session_bundle_b64: str = ""
|
||||
session_bundle_file: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -72,8 +77,19 @@ def _parse_accounts(raw: str) -> list[LingmaAccount]:
|
||||
if isinstance(item, dict):
|
||||
u = str(item.get("username", "")).strip()
|
||||
p = str(item.get("password", "")).strip()
|
||||
if u and p:
|
||||
out.append(LingmaAccount(u, p))
|
||||
bundle = str(item.get("session_bundle", "")).strip()
|
||||
bundle_file = str(item.get("session_bundle_file", "")).strip()
|
||||
# Username/password become optional when a bundle is supplied:
|
||||
# Playwright login is only needed if there's no pre-captured session.
|
||||
if (u and p) or bundle or bundle_file:
|
||||
out.append(
|
||||
LingmaAccount(
|
||||
username=u,
|
||||
password=p,
|
||||
session_bundle_b64=bundle,
|
||||
session_bundle_file=bundle_file,
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
out: list[LingmaAccount] = []
|
||||
@@ -97,11 +113,29 @@ def load_settings() -> Settings:
|
||||
)
|
||||
|
||||
accounts = _parse_accounts(os.getenv("LINGMA_ACCOUNTS", ""))
|
||||
|
||||
# LINGMA_SESSION_BUNDLE / LINGMA_SESSION_BUNDLE_FILE are singleton envs
|
||||
# that attach a session to the first account (or implicitly create one
|
||||
# when neither LINGMA_ACCOUNTS nor LINGMA_USERNAME is provided -- common
|
||||
# "I just want to skip Playwright" case).
|
||||
fallback_bundle = os.getenv("LINGMA_SESSION_BUNDLE", "").strip()
|
||||
fallback_bundle_file = os.getenv("LINGMA_SESSION_BUNDLE_FILE", "").strip()
|
||||
|
||||
if not accounts:
|
||||
u = os.getenv("LINGMA_USERNAME", "").strip()
|
||||
p = os.getenv("LINGMA_PASSWORD", "").strip()
|
||||
if u and p:
|
||||
accounts.append(LingmaAccount(u, p))
|
||||
elif fallback_bundle or fallback_bundle_file:
|
||||
# Bundle-only login: no creds needed.
|
||||
accounts.append(LingmaAccount(username="", password=""))
|
||||
|
||||
if accounts and (fallback_bundle or fallback_bundle_file):
|
||||
# Only fill on account[0] if it doesn't already carry one (accounts
|
||||
# loaded from LINGMA_ACCOUNTS JSON may have per-entry bundles).
|
||||
if not accounts[0].session_bundle_b64 and not accounts[0].session_bundle_file:
|
||||
accounts[0].session_bundle_b64 = fallback_bundle
|
||||
accounts[0].session_bundle_file = fallback_bundle_file
|
||||
|
||||
explicit_count = os.getenv("LINGMA_INSTANCE_COUNT", "").strip()
|
||||
if explicit_count:
|
||||
|
||||
@@ -8,6 +8,12 @@ from .auto_login import AutoLoginManager
|
||||
from .config import LingmaAccount
|
||||
from .lingma_client import LingmaGatewayClient
|
||||
from .logging_config import get_logger
|
||||
from .session_bundle import (
|
||||
apply_bundle_to_workdir,
|
||||
decode_bundle,
|
||||
is_logged_in_workdir,
|
||||
resolve_bundle_b64,
|
||||
)
|
||||
|
||||
|
||||
logger = get_logger("lingma_gateway.pool")
|
||||
@@ -183,20 +189,67 @@ class LingmaPool:
|
||||
pool-mode we skip it anyway, but Lingma may still write there internally)
|
||||
and keeps docker logs readable. Failures are non-fatal; per-instance
|
||||
reconnect loops will take over.
|
||||
|
||||
Before spawning each Lingma process we optionally restore a pre-captured
|
||||
session bundle into the workDir, which lets us skip Playwright login
|
||||
entirely on a fresh volume.
|
||||
"""
|
||||
for inst in self._instances:
|
||||
self._maybe_apply_session_bundle(inst)
|
||||
logger.info(
|
||||
"pool starting %s (workDir=%s port=%d account=%s)",
|
||||
"pool starting %s (workDir=%s port=%d account=%s bundle=%s logged_in=%s)",
|
||||
inst.name,
|
||||
inst.cfg.work_dir,
|
||||
inst.cfg.socket_port,
|
||||
inst.cfg.account.username or "<empty>",
|
||||
bool(
|
||||
inst.cfg.account.session_bundle_b64
|
||||
or inst.cfg.account.session_bundle_file
|
||||
),
|
||||
is_logged_in_workdir(inst.cfg.work_dir),
|
||||
)
|
||||
try:
|
||||
await inst.client.start()
|
||||
except Exception as exc:
|
||||
logger.warning("pool start %s failed: %s", inst.name, exc)
|
||||
|
||||
@staticmethod
|
||||
def _maybe_apply_session_bundle(inst: "PoolInstance") -> None:
|
||||
"""Restore an exported Lingma session into inst.work_dir, if needed.
|
||||
|
||||
Skipped when:
|
||||
- the workDir already looks logged in (persistent volume case);
|
||||
- no bundle is configured.
|
||||
"""
|
||||
acc = inst.cfg.account
|
||||
if is_logged_in_workdir(inst.cfg.work_dir):
|
||||
return
|
||||
|
||||
b64 = resolve_bundle_b64(
|
||||
inline=acc.session_bundle_b64 or None,
|
||||
file_path=acc.session_bundle_file or None,
|
||||
)
|
||||
if not b64:
|
||||
return
|
||||
|
||||
try:
|
||||
raw = decode_bundle(b64)
|
||||
restored = apply_bundle_to_workdir(inst.cfg.work_dir, raw)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"pool %s: failed to apply session bundle, will fall back to auto-login: %s",
|
||||
inst.name,
|
||||
exc,
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"pool %s: applied session bundle (%d files: %s)",
|
||||
inst.name,
|
||||
len(restored),
|
||||
",".join(restored),
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
tasks = [asyncio.create_task(inst.client.close()) for inst in self._instances]
|
||||
for t in tasks:
|
||||
|
||||
60
app/main.py
60
app/main.py
@@ -24,6 +24,7 @@ from .openai_schema import (
|
||||
ModelsResponse,
|
||||
flatten_content,
|
||||
)
|
||||
from .session_bundle import encode_bundle, pack_workdir
|
||||
from .session_cache import SessionCache
|
||||
from .stats import StatsCollector, estimate_tokens
|
||||
|
||||
@@ -633,6 +634,65 @@ async def internal_auto_login_status():
|
||||
return {"ok": True, "instances": out}
|
||||
|
||||
|
||||
@app.post("/internal/session/export", dependencies=[Depends(auth_guard)])
|
||||
async def internal_session_export(instance: str | None = None):
|
||||
"""Export a logged-in Lingma session as a base64 tar.gz bundle.
|
||||
|
||||
The returned `bundle_b64` can be dropped into `LINGMA_SESSION_BUNDLE`
|
||||
(or the `session_bundle` field in `LINGMA_ACCOUNTS` JSON) on any other
|
||||
deployment to skip Playwright login entirely.
|
||||
|
||||
Safety:
|
||||
- Requires a valid API key.
|
||||
- Only works on instances that are currently authenticated (prevents
|
||||
exporting garbage from a half-initialised workDir).
|
||||
- Response is not streamed to logs; callers must store it themselves.
|
||||
"""
|
||||
p = _require_pool()
|
||||
target = None
|
||||
if instance:
|
||||
for inst in p.instances:
|
||||
if inst.name == instance:
|
||||
target = inst
|
||||
break
|
||||
if target is None:
|
||||
raise HTTPException(status_code=404, detail={"error": f"instance {instance} not found"})
|
||||
else:
|
||||
target = p.pick()
|
||||
|
||||
try:
|
||||
status = await target.client.auth_status()
|
||||
except Exception as exc:
|
||||
raise HTTPException(
|
||||
status_code=503,
|
||||
detail={"error": f"instance {target.name} not ready: {exc}"},
|
||||
)
|
||||
if not (status and status.get("id")):
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail={"error": f"instance {target.name} is not logged in"},
|
||||
)
|
||||
|
||||
try:
|
||||
raw = pack_workdir(target.cfg.work_dir)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail={"error": str(exc)})
|
||||
|
||||
bundle_b64 = encode_bundle(raw)
|
||||
logger.info(
|
||||
"session bundle exported from %s (%d bytes raw, %d bytes b64)",
|
||||
target.name,
|
||||
len(raw),
|
||||
len(bundle_b64),
|
||||
)
|
||||
return {
|
||||
"instance": target.name,
|
||||
"account": target.cfg.account.username or "",
|
||||
"raw_bytes": len(raw),
|
||||
"bundle_b64": bundle_b64,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/internal/models/raw", dependencies=[Depends(auth_guard)])
|
||||
async def internal_models_raw(instance: str | None = None):
|
||||
"""Return the raw `config/queryModels` response from Lingma.
|
||||
|
||||
175
app/session_bundle.py
Normal file
175
app/session_bundle.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from __future__ import annotations
|
||||
|
||||
"""
|
||||
Lingma session bundle: pack/unpack the minimal set of cache files that
|
||||
represent a logged-in state, so it can be injected as an env var to skip
|
||||
Playwright auto-login entirely.
|
||||
|
||||
Lingma stores its auth state in `<workDir>/cache/`. Experimentation shows the
|
||||
following files are necessary and sufficient to restore a session on a fresh
|
||||
workDir:
|
||||
|
||||
cache/id -> stable client UUID
|
||||
cache/user -> encrypted user/token blob
|
||||
cache/quota -> quota metadata referenced at startup
|
||||
cache/config.json -> endpoint / env config (mostly non-sensitive)
|
||||
|
||||
Volatile artifacts (db/, logs/, tmp/, index/, diagnosis.bin, .lock, .info)
|
||||
are *not* included: they are rebuilt by Lingma on first run.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import tarfile
|
||||
from pathlib import Path
|
||||
|
||||
from .logging_config import get_logger
|
||||
|
||||
|
||||
logger = get_logger("lingma_gateway.bundle")
|
||||
|
||||
|
||||
BUNDLE_FILES: tuple[str, ...] = (
|
||||
"cache/id",
|
||||
"cache/user",
|
||||
"cache/quota",
|
||||
"cache/config.json",
|
||||
)
|
||||
|
||||
# Hard safety cap so a malformed bundle (or a /tmp full of junk) can't blow up
|
||||
# memory or disk. 4 MiB is ~1000x the real payload.
|
||||
MAX_BUNDLE_BYTES = 4 * 1024 * 1024
|
||||
|
||||
|
||||
def is_logged_in_workdir(work_dir: str | os.PathLike) -> bool:
|
||||
"""Heuristic: a non-empty `cache/user` means someone logged in here."""
|
||||
p = Path(work_dir) / "cache" / "user"
|
||||
try:
|
||||
return p.is_file() and p.stat().st_size > 0
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def pack_workdir(work_dir: str | os.PathLike) -> bytes:
|
||||
"""Create a tar.gz of the session-relevant subset of `work_dir/cache`.
|
||||
|
||||
Missing files are silently skipped (e.g. `quota` isn't always present on
|
||||
fresh logins), but `cache/user` MUST exist or we raise -- exporting an
|
||||
empty bundle would just corrupt the remote side.
|
||||
"""
|
||||
base = Path(work_dir)
|
||||
user_file = base / "cache" / "user"
|
||||
if not user_file.is_file() or user_file.stat().st_size == 0:
|
||||
raise RuntimeError(
|
||||
f"workDir {base} has no login state (cache/user missing or empty); "
|
||||
"cannot export a session bundle"
|
||||
)
|
||||
|
||||
buf = io.BytesIO()
|
||||
with tarfile.open(fileobj=buf, mode="w:gz") as tf:
|
||||
for rel in BUNDLE_FILES:
|
||||
src = base / rel
|
||||
if not src.is_file():
|
||||
continue
|
||||
tf.add(str(src), arcname=rel, recursive=False)
|
||||
data = buf.getvalue()
|
||||
if len(data) > MAX_BUNDLE_BYTES:
|
||||
raise RuntimeError(
|
||||
f"session bundle too large: {len(data)} bytes (limit {MAX_BUNDLE_BYTES})"
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def encode_bundle(raw: bytes) -> str:
|
||||
return base64.b64encode(raw).decode("ascii")
|
||||
|
||||
|
||||
def decode_bundle(b64: str) -> bytes:
|
||||
b64 = (b64 or "").strip()
|
||||
if not b64:
|
||||
raise ValueError("empty bundle")
|
||||
try:
|
||||
raw = base64.b64decode(b64, validate=True)
|
||||
except Exception as exc:
|
||||
raise ValueError(f"invalid base64: {exc}") from exc
|
||||
if len(raw) > MAX_BUNDLE_BYTES:
|
||||
raise ValueError(f"bundle too large: {len(raw)} bytes")
|
||||
return raw
|
||||
|
||||
|
||||
def _is_safe_member(member: tarfile.TarInfo) -> bool:
|
||||
"""Reject anything that isn't one of our whitelisted relative files.
|
||||
|
||||
Guards against path traversal (CVE-2007-4559 class) and symlink tricks.
|
||||
"""
|
||||
if member.name not in BUNDLE_FILES:
|
||||
return False
|
||||
if member.isdir() or member.issym() or member.islnk():
|
||||
return False
|
||||
if not member.isfile():
|
||||
return False
|
||||
# Linux-safe absolute path / traversal check (tarfile already normalizes
|
||||
# `./` but be explicit).
|
||||
if member.name.startswith("/") or ".." in Path(member.name).parts:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def apply_bundle_to_workdir(work_dir: str | os.PathLike, raw: bytes) -> list[str]:
|
||||
"""Extract the bundle into `<work_dir>/`, creating parents as needed.
|
||||
|
||||
Returns the list of files actually restored.
|
||||
"""
|
||||
base = Path(work_dir)
|
||||
base.mkdir(parents=True, exist_ok=True)
|
||||
restored: list[str] = []
|
||||
|
||||
with tarfile.open(fileobj=io.BytesIO(raw), mode="r:gz") as tf:
|
||||
for member in tf.getmembers():
|
||||
if not _is_safe_member(member):
|
||||
logger.warning("bundle: skipping unsafe member %r", member.name)
|
||||
continue
|
||||
dest = base / member.name
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
src = tf.extractfile(member)
|
||||
if src is None:
|
||||
continue
|
||||
data = src.read()
|
||||
# Honour original mode bits when they make sense, else 0600 for
|
||||
# the sensitive `user` file.
|
||||
dest.write_bytes(data)
|
||||
mode = 0o600 if member.name.endswith("/user") else 0o644
|
||||
try:
|
||||
os.chmod(dest, mode)
|
||||
except OSError:
|
||||
pass
|
||||
restored.append(member.name)
|
||||
|
||||
return restored
|
||||
|
||||
|
||||
def resolve_bundle_b64(
|
||||
*,
|
||||
inline: str | None,
|
||||
file_path: str | None,
|
||||
) -> str | None:
|
||||
"""Pick a bundle from either an inline base64 string or a file path.
|
||||
|
||||
Inline wins if both are set. Returns None if neither is configured.
|
||||
Never logs the raw material.
|
||||
"""
|
||||
if inline and inline.strip():
|
||||
return inline.strip()
|
||||
if file_path and file_path.strip():
|
||||
path = Path(file_path.strip()).expanduser()
|
||||
if not path.is_file():
|
||||
logger.warning("bundle: file %s not found, ignoring", path)
|
||||
return None
|
||||
try:
|
||||
text = path.read_text(encoding="ascii", errors="strict").strip()
|
||||
except Exception as exc:
|
||||
logger.warning("bundle: cannot read %s: %s", path, exc)
|
||||
return None
|
||||
return text or None
|
||||
return None
|
||||
Reference in New Issue
Block a user