diff --git a/docs/cli/me-wikipedia.md b/docs/cli/me-wikipedia.md
new file mode 100644
index 0000000..40d0a8c
--- /dev/null
+++ b/docs/cli/me-wikipedia.md
@@ -0,0 +1,70 @@
+# me wikipedia
+
+Download and import Wikimedia article dumps as Memory Engine memories.
+
+## Commands
+
+- [me wikipedia import](#me-wikipedia-import) -- download and import a Wikipedia XML dump
+
+---
+
+## me wikipedia import
+
+Download and import a Wikipedia dump. Wikimedia article dumps use the **MediaWiki XML export format**, usually distributed as a **bzip2-compressed** `.xml.bz2` archive such as `enwiki-latest-pages-articles-multistream.xml.bz2`.
+
+```
+me wikipedia import [source] [options]
+```
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `source` | no | Wiki slug (`simplewiki`, `enwiki`), dump URL, or local `.xml` / `.xml.bz2` file. Defaults to `simplewiki`. |
+
+| Option | Description |
+|--------|-------------|
+| `--wiki <wiki>` | Wiki database name when `source` is omitted or a local file (default: `simplewiki`). |
+| `--date <date>` | Dump date for Wikimedia URLs (default: `latest`). |
+| `--dump-kind <kind>` | Wikimedia dump kind (default: `pages-articles-multistream`). |
+| `--cache-dir <dir>` | Directory for downloaded dump archives. |
+| `--force-download` | Redownload even when the cache file exists. |
+| `--download-only` | Download the dump archive and exit. |
+| `--tree-root <path>` | Tree root for imported memories (default: `wikipedia`). |
+| `--namespace <n>` | MediaWiki namespace number to import (default: `0`, articles). |
+| `--include-redirects` | Import redirect pages. Redirects are skipped by default. |
+| `--content-mode <mode>` | Content to store: `plain` or `wikitext` (default: `plain`). |
+| `--max-content-bytes <n>` | Truncate each memory content to this many UTF-8 bytes (`0` disables truncation). |
+| `--limit <n>` | Maximum article memories to process after filters. Useful for samples. |
+| `--batch-size <n>` | Memories to buffer before each `memory.batchCreate` (default: `500`). |
+| `--dry-run` | Parse and estimate without writing memories. |
+| `--update-existing` | Update existing deterministic Wikipedia memories instead of skipping them. |
+| `-v, --verbose` | Show per-batch progress output. |
+
+### Examples
+
+```bash
+# Cheap validation run against Simple English Wikipedia
+me wikipedia import --dry-run --limit 1000
+
+# Import Simple English Wikipedia
+me wikipedia import simplewiki
+
+# Import full English Wikipedia
+me wikipedia import enwiki
+
+# Use an already-downloaded archive
+me wikipedia import ~/Downloads/enwiki-latest-pages-articles-multistream.xml.bz2 --wiki enwiki
+
+# Download only
+me wikipedia import enwiki --download-only
+```
+
+### Memory shape
+
+Each imported article becomes one memory:
+
+- `content`: `# Title` followed by either cleaned plain text or raw wikitext.
+- `tree`: `<tree-root>.<primary_category_slug>`, where `primary_category_slug` is the first category in `meta.categories` normalized for ltree, for example `wikipedia.relational_databases`. Articles without categories use `wikipedia.uncategorized`.
+- `temporal`: current revision timestamp from the dump.
+- `meta`: source metadata including `source_wiki`, `source_page_id`, `source_revision_id`, `source_title`, `source_url`, `categories`, `primary_category`, `primary_category_slug`, `source_format`, `content_format`, and importer version.
+
+IDs are deterministic per `(wiki, page_id)`, so re-running the same import skips already-created articles instead of duplicating them.
diff --git a/packages/cli/commands/wikipedia.ts b/packages/cli/commands/wikipedia.ts
new file mode 100644
index 0000000..770f25a
--- /dev/null
+++ b/packages/cli/commands/wikipedia.ts
@@ -0,0 +1,727 @@
+/**
+ * me wikipedia — import Wikimedia article dumps as memories.
+ */
+
+import { existsSync } from "node:fs";
+import { homedir } from "node:os";
+import { basename, join, resolve } from "node:path";
+import * as clack from "@clack/prompts";
+import type { MemoryCreateParams } from "@memory.build/protocol/engine";
+import { Command } from "commander";
+import { batchCreateChunked } from "../chunk.ts";
+import { createClient, type EngineClient } from "../client.ts";
+import { resolveCredentials } from "../credentials.ts";
+import { getOutputFormat, type OutputFormat, output } from "../output.ts";
+import { handleError, requireEngine, requireSession } from "../util.ts";
+import {
+  buildWikipediaDumpUrl,
+  buildWikipediaMemory,
+  DEFAULT_WIKIPEDIA_DUMP_DATE,
+  DEFAULT_WIKIPEDIA_DUMP_KIND,
+  DEFAULT_WIKIPEDIA_WIKI,
+  downloadFile,
+  inferDumpDateFromDumpName,
+  inferDumpKindFromDumpName,
+  inferWikiSlugFromDumpName,
+  openDumpTextStream,
+  streamMediaWikiPages,
+  WIKIPEDIA_DUMP_FORMAT,
+  type WikipediaContentMode,
+} from "../wikipedia.ts";
+
+const DEFAULT_TREE_ROOT = "wikipedia";
+const DEFAULT_BATCH_SIZE = 500;
+const OPENAI_TEXT_EMBEDDING_3_SMALL_USD_PER_MILLION_TOKENS = 0.02;
+const VALID_TREE_ROOT_RE = /^[a-z0-9_]+(\.[a-z0-9_]+)*$/;
+const VALID_WIKI_SLUG_RE = /^[a-z0-9_]+wiki$/i;
+
+interface ResolvedWikipediaSource {
+  wikiSlug: string;
+  dumpDate?: string;
+  dumpKind?: string;
+  sourceUrl?: string;
+  sourcePath: string;
+  downloaded: boolean;
+  bytesDownloaded?: number;
+  totalBytes?: number;
+}
+
+interface WikipediaImportStats {
+  dryRun: boolean;
+  dumpFormat: string;
+  sourcePath: string;
+  sourceUrl?: string;
+  wikiSlug: string;
+  dumpDate?: string;
+  dumpKind?: string;
+  treeRoot: string;
+  namespace: number;
+  includeRedirects: boolean;
+  contentMode: WikipediaContentMode;
+  pagesScanned: number;
+  namespaceSkipped: number;
+  redirectsSkipped: number;
+  emptyContentSkipped: number;
+  memoriesPrepared: number;
+  contentTruncated: number;
+  imported: number;
+  updated: number;
+  skipped: number;
+  failed: number;
+  estimatedContentBytes: number;
+  estimatedEmbeddingTokens: number;
+  estimatedEmbeddingCostUsd: number;
+  errors: Array<{ source: string; error: string; itemCount?: number }>;
+}
+
+export function createWikipediaCommand(): Command {
+  const wikipedia = new Command("wikipedia").description(
+    "import Wikipedia dumps",
+  );
+  wikipedia.addCommand(createWikipediaImportCommand());
+  return wikipedia;
+}
+
+function createWikipediaImportCommand(): Command {
+  return new Command("import")
+    .description("download and import a Wikipedia XML dump as memories")
+    .argument(
+      "[source]",
+      "wiki slug (simplewiki/enwiki), dump URL, or local .xml/.xml.bz2 file",
+    )
+    .option(
+      "--wiki <wiki>",
+      `wiki database name when source is omitted or a local file (default: ${DEFAULT_WIKIPEDIA_WIKI})`,
+      DEFAULT_WIKIPEDIA_WIKI,
+    )
+    .option(
+      "--date <date>",
+      `dump date for Wikimedia URLs (default: ${DEFAULT_WIKIPEDIA_DUMP_DATE})`,
+      DEFAULT_WIKIPEDIA_DUMP_DATE,
+    )
+    .option(
+      "--dump-kind <kind>",
+      `Wikimedia dump kind (default: ${DEFAULT_WIKIPEDIA_DUMP_KIND})`,
+      DEFAULT_WIKIPEDIA_DUMP_KIND,
+    )
+    .option(
+      "--cache-dir <dir>",
+      "directory for downloaded dump archives",
+      defaultWikipediaCacheDir(),
+    )
+    .option("--force-download", "redownload even when the cache file exists")
+    .option("--download-only", "download the dump archive and exit")
+    .option(
+      "--tree-root <path>",
+      `tree root for imported memories (default: ${DEFAULT_TREE_ROOT})`,
+      DEFAULT_TREE_ROOT,
+    )
+    .option(
+      "--namespace <n>",
+      "MediaWiki namespace number to import (0 = articles)",
+      "0",
+    )
+    .option("--include-redirects", "import redirect pages (default: skip)")
+    .option(
+      "--content-mode <mode>",
+      "article content to store: plain or wikitext",
+      "plain",
+    )
+    .option(
+      "--max-content-bytes <n>",
+      "truncate each memory content to this many UTF-8 bytes (0 disables truncation)",
+    )
+    .option("--limit <n>", "maximum article memories to process after filters")
+    .option(
+      "--batch-size <n>",
+      `memories to buffer before each batchCreate (default: ${DEFAULT_BATCH_SIZE})`,
+      String(DEFAULT_BATCH_SIZE),
+    )
+    .option("--dry-run", "parse and estimate without writing memories")
+    .option(
+      "--update-existing",
+      "update existing deterministic Wikipedia memories instead of skipping them",
+    )
+    .option("-v, --verbose", "show per-batch progress output")
+    .action(async (source: string | undefined, opts, cmd) => {
+      const globalOpts = cmd.optsWithGlobals();
+      const fmt = getOutputFormat(globalOpts);
+      const requiresEngine = opts.dryRun !== true && opts.downloadOnly !== true;
+
+      let engine: EngineClient | undefined;
+      if (requiresEngine) {
+        const creds = resolveCredentials(globalOpts.server);
+        requireSession(creds, fmt);
+        requireEngine(creds, fmt);
+        engine = createClient({ url: creds.server, apiKey: creds.apiKey });
+      }
+
+      try {
+        const resolvedSource = await resolveWikipediaSource(source, opts, fmt);
+
+        if (opts.downloadOnly) {
+          await output(
+            {
+              downloaded: resolvedSource.downloaded,
+              path: resolvedSource.sourcePath,
+              url: resolvedSource.sourceUrl,
+              wikiSlug: resolvedSource.wikiSlug,
+              dumpDate: resolvedSource.dumpDate,
+              dumpKind: resolvedSource.dumpKind,
+              dumpFormat: WIKIPEDIA_DUMP_FORMAT,
+              bytesDownloaded: resolvedSource.bytesDownloaded,
+              totalBytes: resolvedSource.totalBytes,
+            },
+            fmt,
+            () => {
+              const verb = resolvedSource.downloaded
+                ? "Downloaded"
+                : "Using cached";
+              clack.log.success(`${verb} ${resolvedSource.sourcePath}`);
+              console.log(`  Format: ${WIKIPEDIA_DUMP_FORMAT}`);
+              if (resolvedSource.sourceUrl) {
+                console.log(`  URL: ${resolvedSource.sourceUrl}`);
+              }
+            },
+          );
+          return;
+        }
+
+        const parsedOptions = parseWikipediaImportOptions(opts);
+        const result = await runWikipediaImport({
+          engine,
+          resolvedSource,
+          fmt,
+          dryRun: opts.dryRun === true,
+          verbose: opts.verbose === true,
+          treeRoot: parsedOptions.treeRoot,
+          namespace: parsedOptions.namespace,
+          includeRedirects: opts.includeRedirects === true,
+          contentMode: parsedOptions.contentMode,
+          maxContentBytes: parsedOptions.maxContentBytes,
+          limit: parsedOptions.limit,
+          batchSize: parsedOptions.batchSize,
+          updateExisting: opts.updateExisting === true,
+        });
+
+        await output(result, fmt, () => renderWikipediaImportResult(result));
+
+        if (result.failed > 0 && result.imported === 0 && !result.dryRun) {
+          process.exit(2);
+        }
+        if (result.failed > 0 && !result.dryRun) process.exit(1);
+      } catch (error) {
+        handleError(error, fmt);
+      }
+    });
+}
+
+interface ParsedWikipediaImportOptions {
+  treeRoot: string;
+  namespace: number;
+  contentMode: WikipediaContentMode;
+  maxContentBytes?: number;
+  limit?: number;
+  batchSize: number;
+}
+
+function parseWikipediaImportOptions(
+  opts: Record<string, unknown>,
+): ParsedWikipediaImportOptions {
+  const treeRoot = String(opts.treeRoot ?? DEFAULT_TREE_ROOT);
+  if (!VALID_TREE_ROOT_RE.test(treeRoot)) {
+    throw new Error(
+      `Invalid --tree-root: '${treeRoot}'. Must match [a-z0-9_]+(\\.[a-z0-9_]+)*`,
+    );
+  }
+
+  const namespace = parseNonNegativeInteger("--namespace", opts.namespace);
+  const contentMode = String(opts.contentMode ?? "plain");
+  if (contentMode !== "plain" && contentMode !== "wikitext") {
+    throw new Error("Invalid --content-mode: must be plain or wikitext");
+  }
+
+  const maxContentBytes =
+    opts.maxContentBytes === undefined
+      ? undefined
+      : parseNonNegativeInteger("--max-content-bytes", opts.maxContentBytes);
+  const limit =
+    opts.limit === undefined
+      ? undefined
+      : parsePositiveInteger("--limit", opts.limit);
+  const batchSize = parsePositiveInteger("--batch-size", opts.batchSize);
+
+  return {
+    treeRoot,
+    namespace,
+    contentMode,
+    maxContentBytes: maxContentBytes === 0 ? undefined : maxContentBytes,
+    limit,
+    batchSize,
+  };
+}
+
+async function resolveWikipediaSource(
+  source: string | undefined,
+  opts: Record<string, unknown>,
+  fmt: OutputFormat,
+): Promise<ResolvedWikipediaSource> {
+  const cacheDir = resolve(expandHome(String(opts.cacheDir)));
+  const requestedDumpKind = String(
+    opts.dumpKind ?? DEFAULT_WIKIPEDIA_DUMP_KIND,
+  );
+  const force = opts.forceDownload === true;
+
+  if (source && isUrl(source)) {
+    const url = source;
+    const fileName = basename(new URL(url).pathname);
+    const sourcePath = join(cacheDir, fileName);
+    const wikiSlug = normalizeWikiSlug(
+      inferWikiSlugFromDumpName(fileName) ??
+        String(opts.wiki ?? DEFAULT_WIKIPEDIA_WIKI),
+    );
+    const dumpDate =
+      inferDumpDateFromDumpName(fileName) ??
+      String(opts.date ?? DEFAULT_WIKIPEDIA_DUMP_DATE);
+    const dumpKind = inferDumpKindFromDumpName(fileName) ?? requestedDumpKind;
+    const downloaded = await downloadWikipediaSource(
+      url,
+      sourcePath,
+      force,
+      fmt,
+    );
+    return {
+      wikiSlug,
+      dumpDate,
+      dumpKind,
+      sourceUrl: url,
+      sourcePath,
+      downloaded: downloaded.downloaded,
+      bytesDownloaded: downloaded.bytesDownloaded,
+      totalBytes: downloaded.totalBytes,
+    };
+  }
+
+  if (source && existsSync(resolve(expandHome(source)))) {
+    const sourcePath = resolve(expandHome(source));
+    const fileName = basename(sourcePath);
+    return {
+      wikiSlug: normalizeWikiSlug(
+        inferWikiSlugFromDumpName(fileName) ??
+          String(opts.wiki ?? DEFAULT_WIKIPEDIA_WIKI),
+      ),
+      dumpDate:
+        inferDumpDateFromDumpName(fileName) ??
+        String(opts.date ?? DEFAULT_WIKIPEDIA_DUMP_DATE),
+      dumpKind: inferDumpKindFromDumpName(fileName) ?? requestedDumpKind,
+      sourcePath,
+      downloaded: false,
+    };
+  }
+
+  const sourceLooksLikeWikiSlug = source && VALID_WIKI_SLUG_RE.test(source);
+  if (source && !sourceLooksLikeWikiSlug) {
+    throw new Error(
+      `Source '${source}' is not a URL, an existing file, or a wiki slug like enwiki/simplewiki.`,
+    );
+  }
+
+  const wikiSlug = normalizeWikiSlug(
+    String(source ?? opts.wiki ?? DEFAULT_WIKIPEDIA_WIKI),
+  );
+
+  const dumpDate = String(opts.date ?? DEFAULT_WIKIPEDIA_DUMP_DATE);
+  const url = buildWikipediaDumpUrl(wikiSlug, dumpDate, requestedDumpKind);
+  const fileName = basename(new URL(url).pathname);
+  const sourcePath = join(cacheDir, fileName);
+  const downloaded = await downloadWikipediaSource(url, sourcePath, force, fmt);
+
+  return {
+    wikiSlug,
+    dumpDate,
+    dumpKind: requestedDumpKind,
+    sourceUrl: url,
+    sourcePath,
+    downloaded: downloaded.downloaded,
+    bytesDownloaded: downloaded.bytesDownloaded,
+    totalBytes: downloaded.totalBytes,
+  };
+}
+
+async function downloadWikipediaSource(
+  url: string,
+  destinationPath: string,
+  force: boolean,
+  fmt: OutputFormat,
+) {
+  let lastProgressAt = 0;
+  let wroteProgress = false;
+  return await downloadFile(url, destinationPath, {
+    force,
+    onProgress: ({ bytesDownloaded, totalBytes }) => {
+      if (fmt !== "text" || !process.stderr.isTTY) return;
+      const now = Date.now();
+      if (now - lastProgressAt < 1000) return;
+      lastProgressAt = now;
+      wroteProgress = true;
+      const total = totalBytes ? ` / ${formatBytes(totalBytes)}` : "";
+      process.stderr.write(
+        `\rDownloading ${formatBytes(bytesDownloaded)}${total}...`,
+      );
+    },
+  }).finally(() => {
+    if (wroteProgress) process.stderr.write("\n");
+  });
+}
+
+interface RunWikipediaImportOptions {
+  engine?: EngineClient;
+  resolvedSource: ResolvedWikipediaSource;
+  fmt: OutputFormat;
+  dryRun: boolean;
+  verbose: boolean;
+  treeRoot: string;
+  namespace: number;
+  includeRedirects: boolean;
+  contentMode: WikipediaContentMode;
+  maxContentBytes?: number;
+  limit?: number;
+  batchSize: number;
+  updateExisting: boolean;
+}
+
+async function runWikipediaImport(
+  options: RunWikipediaImportOptions,
+): Promise<WikipediaImportStats> {
+  const importedAt = new Date().toISOString();
+  const stats: WikipediaImportStats = {
+    dryRun: options.dryRun,
+    dumpFormat: WIKIPEDIA_DUMP_FORMAT,
+    sourcePath: options.resolvedSource.sourcePath,
+    sourceUrl: options.resolvedSource.sourceUrl,
+    wikiSlug: options.resolvedSource.wikiSlug,
+    dumpDate: options.resolvedSource.dumpDate,
+    dumpKind: options.resolvedSource.dumpKind,
+    treeRoot: options.treeRoot,
+    namespace: options.namespace,
+    includeRedirects: options.includeRedirects,
+    contentMode: options.contentMode,
+    pagesScanned: 0,
+    namespaceSkipped: 0,
+    redirectsSkipped: 0,
+    emptyContentSkipped: 0,
+    memoriesPrepared: 0,
+    contentTruncated: 0,
+    imported: 0,
+    updated: 0,
+    skipped: 0,
+    failed: 0,
+    estimatedContentBytes: 0,
+    estimatedEmbeddingTokens: 0,
+    estimatedEmbeddingCostUsd: 0,
+    errors: [],
+  };
+
+  const pending: MemoryCreateParams[] = [];
+  let batchNumber = 0;
+  let stoppedEarly = false;
+  let lastProgressAt = 0;
+  const openedDump = openDumpTextStream(options.resolvedSource.sourcePath);
+
+  const flushPending = async () => {
+    if (pending.length === 0) return;
+    const batch = pending.splice(0, pending.length);
+    batchNumber++;
+
+    if (options.dryRun) {
+      if (options.verbose && options.fmt === "text") {
+        console.error(
+          `Validated batch ${batchNumber} (${batch.length} memories)`,
+        );
+      }
+      return;
+    }
+
+    if (!options.engine)
+      throw new Error("Engine client is required for import");
+    const explicitIds = batch
+      .map((memory) => memory.id)
+      .filter((id): id is string => typeof id === "string");
+    const { insertedIds, failedIds, errors } = await batchCreateChunked(
+      options.engine,
+      batch,
+    );
+    stats.imported += insertedIds.length;
+    const insertedSet = new Set(insertedIds);
+    const failedSet = new Set(failedIds);
+    const skippedIds = explicitIds.filter(
+      (id) => !insertedSet.has(id) && !failedSet.has(id),
+    );
+
+    if (options.updateExisting) {
+      const payloadsById = new Map(
+        batch
+          .filter(
+            (memory): memory is MemoryCreateParams & { id: string } =>
+              typeof memory.id === "string",
+          )
+          .map((memory) => [memory.id, memory]),
+      );
+      for (const skippedId of skippedIds) {
+        const payload = payloadsById.get(skippedId);
+        if (!payload) continue;
+        try {
+          await options.engine.memory.update({
+            id: skippedId,
+            content: payload.content,
+            meta: payload.meta,
+            tree: payload.tree,
+            temporal: payload.temporal,
+          });
+          stats.updated++;
+        } catch (error) {
+          stats.failed++;
+          stats.errors.push({
+            source: `batch ${batchNumber}, update ${skippedId}`,
+            error: error instanceof Error ? error.message : String(error),
+            itemCount: 1,
+          });
+        }
+      }
+    } else {
+      stats.skipped += skippedIds.length;
+    }
+
+    for (const error of errors) {
+      stats.failed += error.itemCount;
+      stats.errors.push({
+        source: `batch ${batchNumber}, chunk ${error.chunkIndex}`,
+        error: error.error,
+        itemCount: error.itemCount,
+      });
+    }
+
+    if (options.verbose && options.fmt === "text") {
+      console.error(
+        `Imported batch ${batchNumber}: +${insertedIds.length}, updated=${stats.updated}, skipped=${stats.skipped}, failed=${stats.failed}`,
+      );
+    }
+  };
+
+  try {
+    for await (const page of streamMediaWikiPages(openedDump.stream)) {
+      stats.pagesScanned++;
+
+      if (page.namespace !== options.namespace) {
+        stats.namespaceSkipped++;
+        maybeRenderProgress(options, stats, lastProgressAt, (next) => {
+          lastProgressAt = next;
+        });
+        continue;
+      }
+
+      const redirect =
+        page.redirectTitle !== undefined ||
+        /^#REDIRECT\b/i.test(page.text.trim());
+      if (redirect && !options.includeRedirects) {
+        stats.redirectsSkipped++;
+        maybeRenderProgress(options, stats, lastProgressAt, (next) => {
+          lastProgressAt = next;
+        });
+        continue;
+      }
+
+      const built = buildWikipediaMemory(page, {
+        wikiSlug: options.resolvedSource.wikiSlug,
+        treeRoot: options.treeRoot,
+        contentMode: options.contentMode,
+        sourceDumpPath: options.resolvedSource.sourcePath,
+        sourceDumpUrl: options.resolvedSource.sourceUrl,
+        sourceDumpDate: options.resolvedSource.dumpDate,
+        sourceDumpKind: options.resolvedSource.dumpKind,
+        importedAt,
+        maxContentBytes: options.maxContentBytes,
+      });
+
+      if (!built) {
+        stats.emptyContentSkipped++;
+        continue;
+      }
+
+      stats.memoriesPrepared++;
+      stats.estimatedContentBytes += built.contentBytes;
+      if (built.truncated) stats.contentTruncated++;
+      pending.push(built.memory);
+
+      if (pending.length >= options.batchSize) {
+        await flushPending();
+      }
+
+      maybeRenderProgress(options, stats, lastProgressAt, (next) => {
+        lastProgressAt = next;
+      });
+
+      if (
+        options.limit !== undefined &&
+        stats.memoriesPrepared >= options.limit
+      ) {
+        stoppedEarly = true;
+        break;
+      }
+    }
+
+    await flushPending();
+  } finally {
+    if (stoppedEarly) {
+      openedDump.close();
+      await openedDump.completion.catch(() => {});
+    } else {
+      await openedDump.completion;
+    }
+    if (options.fmt === "text" && process.stderr.isTTY)
+      process.stderr.write("\n");
+  }
+
+  stats.estimatedEmbeddingTokens = estimateEmbeddingTokens(
+    stats.estimatedContentBytes,
+  );
+  stats.estimatedEmbeddingCostUsd = estimateEmbeddingCostUsd(
+    stats.estimatedEmbeddingTokens,
+  );
+
+  return stats;
+}
+
+function maybeRenderProgress(
+  options: RunWikipediaImportOptions,
+  stats: WikipediaImportStats,
+  lastProgressAt: number,
+  setLastProgressAt: (timestamp: number) => void,
+): void {
+  if (options.fmt !== "text" || !process.stderr.isTTY) return;
+  const now = Date.now();
+  if (now - lastProgressAt < 2000) return;
+  setLastProgressAt(now);
+  process.stderr.write(
+    `\rScanned ${formatInteger(stats.pagesScanned)} pages; prepared ${formatInteger(
+      stats.memoriesPrepared,
+    )} article memories; imported ${formatInteger(stats.imported)}...`,
+  );
+}
+
+function renderWikipediaImportResult(result: WikipediaImportStats): void {
+  const preparedOrImported = result.dryRun
+    ? result.memoriesPrepared
+    : result.imported;
+  let summary = `${result.dryRun ? "Would import" : "Imported"} ${formatInteger(preparedOrImported)} Wikipedia article ${preparedOrImported === 1 ? "memory" : "memories"}`;
+  if (!result.dryRun && result.imported === 0 && result.updated > 0) {
+    summary = `Updated ${formatInteger(result.updated)} existing Wikipedia article ${result.updated === 1 ? "memory" : "memories"}`;
+  } else if (!result.dryRun && result.updated > 0) {
+    summary = `${summary} and updated ${formatInteger(result.updated)} existing`;
+  }
+  clack.log.success(summary);
+  console.log(`  Wiki: ${result.wikiSlug}`);
+  console.log(`  Format: ${result.dumpFormat}`);
+  console.log(`  Source: ${result.sourcePath}`);
+  console.log(`  Tree root: ${result.treeRoot}`);
+  console.log(`  Pages scanned: ${formatInteger(result.pagesScanned)}`);
+  console.log(
+    `  Article memories prepared: ${formatInteger(result.memoriesPrepared)}`,
+  );
+  if (result.updated > 0) {
+    console.log(`  Updated existing: ${formatInteger(result.updated)}`);
+  }
+  if (result.skipped > 0) {
+    console.log(`  Already existed: ${formatInteger(result.skipped)}`);
+  }
+  if (result.failed > 0) {
+    console.log(`  Failed: ${formatInteger(result.failed)}`);
+  }
+  if (result.redirectsSkipped > 0 || result.namespaceSkipped > 0) {
+    console.log(
+      `  Skipped: redirects=${formatInteger(result.redirectsSkipped)}, namespace=${formatInteger(result.namespaceSkipped)}, empty=${formatInteger(result.emptyContentSkipped)}`,
+    );
+  }
+  if (result.contentTruncated > 0) {
+    console.log(`  Truncated: ${formatInteger(result.contentTruncated)}`);
+  }
+  console.log(
+    `  Estimated embedded content: ${formatBytes(result.estimatedContentBytes)} ≈ ${formatInteger(result.estimatedEmbeddingTokens)} tokens (~$${result.estimatedEmbeddingCostUsd.toFixed(2)} with text-embedding-3-small)`,
+  );
+  if (result.errors.length > 0) {
+    console.log(`  Errors: ${result.errors.length}`);
+    for (const error of result.errors.slice(0, 10)) {
+      console.log(`    ${error.source}: ${error.error}`);
+    }
+    if (result.errors.length > 10) {
+      console.log(`    ... ${result.errors.length - 10} more`);
+    }
+  }
+}
+
+function defaultWikipediaCacheDir(): string {
+  const base = process.env.XDG_CACHE_HOME || join(homedir(), ".cache");
+  return join(base, "memory-engine", "wikipedia");
+}
+
+function expandHome(path: string): string {
+  if (path === "~") return homedir();
+  if (path.startsWith("~/")) return join(homedir(), path.slice(2));
+  return path;
+}
+
+function isUrl(value: string): boolean {
+  return /^https?:\/\//i.test(value);
+}
+
+function normalizeWikiSlug(value: string): string {
+  const wikiSlug = value.toLowerCase();
+  if (!VALID_WIKI_SLUG_RE.test(wikiSlug)) {
+    throw new Error(
+      `Invalid wiki slug '${wikiSlug}'. Use a Wikimedia database name like enwiki or simplewiki.`,
+    );
+  }
+  return wikiSlug;
+}
+
+function parseNonNegativeInteger(name: string, value: unknown): number {
+  const parsed = Number.parseInt(String(value), 10);
+  if (!Number.isInteger(parsed) || parsed < 0) {
+    throw new Error(`Invalid ${name}: expected a non-negative integer`);
+  }
+  return parsed;
+}
+
+function parsePositiveInteger(name: string, value: unknown): number {
+  const parsed = Number.parseInt(String(value), 10);
+  if (!Number.isInteger(parsed) || parsed <= 0) {
+    throw new Error(`Invalid ${name}: expected a positive integer`);
+  }
+  return parsed;
+}
+
+function estimateEmbeddingTokens(contentBytes: number): number {
+  return Math.ceil(contentBytes / 4);
+}
+
+function estimateEmbeddingCostUsd(tokens: number): number {
+  return (
+    (tokens / 1_000_000) * OPENAI_TEXT_EMBEDDING_3_SMALL_USD_PER_MILLION_TOKENS
+  );
+}
+
+function formatBytes(bytes: number): string {
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let value = bytes;
+  let unitIndex = 0;
+  while (value >= 1024 && unitIndex < units.length - 1) {
+    value /= 1024;
+    unitIndex++;
+  }
+  const decimals = unitIndex === 0 ? 0 : value >= 10 ? 1 : 2;
+  return `${value.toFixed(decimals)} ${units[unitIndex]}`;
+}
+
+function formatInteger(value: number): string {
+  return new Intl.NumberFormat("en-US").format(value);
+}
diff --git a/packages/cli/index.ts b/packages/cli/index.ts
index 3738174..6a39597 100755
--- a/packages/cli/index.ts
+++ b/packages/cli/index.ts
@@ -30,6 +30,7 @@ import { createUpgradeCommand } from "./commands/upgrade.ts";
 import { createUserCommand } from "./commands/user.ts";
 import { createVersionCommand } from "./commands/version.ts";
 import { createWhoamiCommand } from "./commands/whoami.ts";
+import { createWikipediaCommand } from "./commands/wikipedia.ts";
 import { setExpanded } from "./output.ts";
 
 const SHELLS = ["zsh", "bash", "fish", "powershell"] as const;
@@ -88,6 +89,9 @@ program.addCommand(createCodexCommand());
 // Local web UI
 program.addCommand(createServeCommand());
 
+// Dataset imports
+program.addCommand(createWikipediaCommand());
+
 // Engine-level RBAC commands
 program.addCommand(createUserCommand());
 program.addCommand(createGrantCommand());
diff --git a/packages/cli/wikipedia.test.ts b/packages/cli/wikipedia.test.ts
new file mode 100644
index 0000000..8b16fae
--- /dev/null
+++ b/packages/cli/wikipedia.test.ts
@@ -0,0 +1,197 @@
+import { describe, expect, test } from "bun:test";
+import {
+  buildWikipediaArticleUrl,
+  buildWikipediaDumpUrl,
+  buildWikipediaMemory,
+  cleanWikitextToPlainText,
+  deterministicWikipediaPageUuidV7,
+  extractCategories,
+  inferDumpDateFromDumpName,
+  inferDumpKindFromDumpName,
+  inferWikiSlugFromDumpName,
+  parseMediaWikiPageXml,
+  streamMediaWikiPages,
+} from "./wikipedia.ts";
+
+const UUIDV7_RE =
+  /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
+
+describe("Wikipedia dump helpers", () => {
+  test("builds canonical Wikimedia dump URLs", () => {
+    expect(buildWikipediaDumpUrl("simplewiki")).toBe(
+      "https://dumps.wikimedia.org/simplewiki/latest/simplewiki-latest-pages-articles-multistream.xml.bz2",
+    );
+    expect(buildWikipediaDumpUrl("enwiki", "20260501", "pages-articles")).toBe(
+      "https://dumps.wikimedia.org/enwiki/20260501/enwiki-20260501-pages-articles.xml.bz2",
+    );
+  });
+
+  test("infers wiki slug and date from dump names", () => {
+    const name = "enwiki-20260501-pages-articles-multistream.xml.bz2";
+    expect(inferWikiSlugFromDumpName(name)).toBe("enwiki");
+    expect(inferDumpDateFromDumpName(name)).toBe("20260501");
+    expect(inferDumpKindFromDumpName(name)).toBe("pages-articles-multistream");
+  });
+
+  test("parses one MediaWiki page XML block", () => {
+    const page = parseMediaWikiPageXml(`
+<page>
+  <title>PostgreSQL</title>
+  <ns>0</ns>
+  <id>23456</id>
+  <revision>
+    <id>98765</id>
+    <timestamp>2026-05-01T12:34:56Z</timestamp>
+    <model>wikitext</model>
+    <format>text/x-wiki</format>
+    <text bytes="42">'''PostgreSQL''' is an [[open-source]] database &amp; server.</text>
+    <sha1>abc123</sha1>
+  </revision>
+</page>`);
+
+    expect(page).toEqual({
+      title: "PostgreSQL",
+      namespace: 0,
+      pageId: "23456",
+      revisionId: "98765",
+      timestamp: "2026-05-01T12:34:56Z",
+      text: "'''PostgreSQL''' is an [[open-source]] database & server.",
+      redirectTitle: undefined,
+      model: "wikitext",
+      format: "text/x-wiki",
+      sha1: "abc123",
+      textBytes: 42,
+    });
+  });
+
+  test("streams pages across chunk boundaries", async () => {
+    const xml = `<mediawiki><page><title>A</title><ns>0</ns><id>1</id><revision><id>11</id><text>Alpha</text></revision></page><page><title>B</title><ns>0</ns><id>2</id><revision><id>22</id><text>Beta</text></revision></page></mediawiki>`;
+    const encoder = new TextEncoder();
+    const stream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        controller.enqueue(encoder.encode(xml.slice(0, 75)));
+        controller.enqueue(encoder.encode(xml.slice(75)));
+        controller.close();
+      },
+    });
+
+    const pages = [];
+    for await (const page of streamMediaWikiPages(stream)) {
+      pages.push(page.title);
+    }
+
+    expect(pages).toEqual(["A", "B"]);
+  });
+
+  test("extracts categories", () => {
+    expect(
+      extractCategories(
+        "[[Category:Relational databases]] [[Category:Free software|Databases]] [[category:Relational databases]]",
+      ),
+    ).toEqual(["Relational databases", "Free software"]);
+  });
+
+  test("cleans wikitext into readable text", () => {
+    const cleaned = cleanWikitextToPlainText(`{{Infobox}}
+[[File:Fan.jpg|thumb|A [[fan]] moves air.]]
+'''PostgreSQL''' is an [[open-source software|open-source]] [[database]].<ref>noise</ref>
+
+== History ==
+* Created at [https://example.com Berkeley]
+
+== References ==
+* [https://example.com]
+
+== Empty section ==
+[[Category:Relational databases]]`);
+
+    expect(cleaned).toContain("PostgreSQL is an open-source database.");
+    expect(cleaned).toContain("## History");
+    expect(cleaned).toContain("- Created at Berkeley");
+    expect(cleaned).not.toContain("Infobox");
+    expect(cleaned).not.toContain("moves air");
+    expect(cleaned).not.toContain("Category");
+    expect(cleaned).not.toContain("References");
+    expect(cleaned).not.toContain("Empty section");
+    expect(cleaned).not.toContain("\n-\n");
+    expect(cleaned).not.toContain("noise");
+    expect(cleaned).not.toContain("]]");
+  });
+
+  test("builds memory payload with stable metadata", () => {
+    const page = parseMediaWikiPageXml(`
+<page>
+  <title>PostgreSQL</title>
+  <ns>0</ns>
+  <id>23456</id>
+  <revision>
+    <id>98765</id>
+    <timestamp>2026-05-01T12:34:56Z</timestamp>
+    <model>wikitext</model>
+    <format>text/x-wiki</format>
+    <text bytes="120">'''PostgreSQL''' is an [[open-source software|open-source]] database. [[Category:Relational databases]]</text>
+    <sha1>abc123</sha1>
+  </revision>
+</page>`);
+    expect(page).not.toBeNull();
+
+    const built = buildWikipediaMemory(page!, {
+      wikiSlug: "enwiki",
+      treeRoot: "wikipedia",
+      contentMode: "plain",
+      sourceDumpPath: "/tmp/enwiki-latest-pages-articles-multistream.xml.bz2",
+      sourceDumpUrl:
+        "https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles-multistream.xml.bz2",
+      sourceDumpDate: "latest",
+      sourceDumpKind: "pages-articles-multistream",
+      importedAt: "2026-05-07T00:00:00.000Z",
+    });
+
+    expect(built).not.toBeNull();
+    expect(built!.memory.id).toMatch(UUIDV7_RE);
+    expect(built!.memory.tree).toBe("wikipedia.relational_databases");
+    expect(built!.memory.content).toContain(
+      "# PostgreSQL\n\nPostgreSQL is an open-source database.",
+    );
+    expect(built!.memory.meta).toMatchObject({
+      type: "wikipedia_article",
+      source: "wikipedia",
+      source_wiki: "enwiki",
+      source_page_id: "23456",
+      source_revision_id: "98765",
+      source_title: "PostgreSQL",
+      source_url: "https://en.wikipedia.org/wiki/PostgreSQL",
+      source_format: "mediawiki_xml",
+      content_format: "plain_text",
+      categories: ["Relational databases"],
+      primary_category: "Relational databases",
+      primary_category_slug: "relational_databases",
+      article_slug: "postgresql",
+      imported_at: "2026-05-07T00:00:00.000Z",
+      importer_version: "1",
+    });
+    expect(built!.memory.temporal).toEqual({
+      start: "2026-05-01T12:34:56.000Z",
+    });
+  });
+
+  test("deterministic page ids are stable and page-keyed", () => {
+    const first = deterministicWikipediaPageUuidV7("enwiki", "23456");
+    const second = deterministicWikipediaPageUuidV7("enwiki", "23456");
+    const differentWiki = deterministicWikipediaPageUuidV7(
+      "simplewiki",
+      "23456",
+    );
+
+    expect(first).toBe(second);
+    expect(first).toMatch(UUIDV7_RE);
+    expect(differentWiki).not.toBe(first);
+  });
+
+  test("builds article URLs", () => {
+    expect(buildWikipediaArticleUrl("enwiki", "A/B test"));
+    expect(buildWikipediaArticleUrl("enwiki", "A/B test")).toBe(
+      "https://en.wikipedia.org/wiki/A%2FB_test",
+    );
+  });
+});
diff --git a/packages/cli/wikipedia.ts b/packages/cli/wikipedia.ts
new file mode 100644
index 0000000..4fe11a4
--- /dev/null
+++ b/packages/cli/wikipedia.ts
@@ -0,0 +1,823 @@
+/**
+ * Wikipedia dump import helpers.
+ *
+ * Wikimedia's public database dumps for article text are MediaWiki XML export
+ * files, most commonly distributed as bzip2 archives named like:
+ *
+ *   enwiki-latest-pages-articles-multistream.xml.bz2
+ *
+ * The "multistream" suffix means the .bz2 file is composed of multiple bzip2
+ * streams plus a companion index file. For a sequential import we can treat it
+ * as a normal bzip2-compressed XML file and stream-decompress it.
+ */
+
+import { spawn } from "node:child_process";
+import { createHash } from "node:crypto";
+import { once } from "node:events";
+import {
+  createWriteStream,
+  existsSync,
+  mkdirSync,
+  renameSync,
+  unlinkSync,
+} from "node:fs";
+import { dirname } from "node:path";
+import { Readable } from "node:stream";
+import type { MemoryCreateParams } from "@memory.build/protocol/engine";
+import { normalizeSlug } from "./importers/slug.ts";
+
+export const DEFAULT_WIKIPEDIA_WIKI = "simplewiki";
+export const DEFAULT_WIKIPEDIA_DUMP_DATE = "latest";
+export const DEFAULT_WIKIPEDIA_DUMP_KIND = "pages-articles-multistream";
+export const WIKIPEDIA_DUMP_FORMAT =
+  "MediaWiki XML export (usually bzip2-compressed .xml.bz2)";
+export const WIKIPEDIA_IMPORTER_VERSION = "1";
+
+const WIKIPEDIA_LAUNCH_TIMESTAMP_MS = Date.UTC(2001, 0, 15);
+
+export type WikipediaContentMode = "plain" | "wikitext";
+
+export interface WikipediaPage {
+  title: string;
+  namespace: number;
+  pageId: string;
+  revisionId: string;
+  timestamp?: string;
+  text: string;
+  redirectTitle?: string;
+  model?: string;
+  format?: string;
+  sha1?: string;
+  textBytes?: number;
+}
+
+export interface WikipediaMemoryBuildOptions {
+  wikiSlug: string;
+  treeRoot: string;
+  contentMode: WikipediaContentMode;
+  sourceDumpPath?: string;
+  sourceDumpUrl?: string;
+  sourceDumpDate?: string;
+  sourceDumpKind?: string;
+  importedAt: string;
+  maxContentBytes?: number;
+}
+
+export interface BuiltWikipediaMemory {
+  memory: MemoryCreateParams;
+  categories: string[];
+  truncated: boolean;
+  contentBytes: number;
+  articleSlug: string;
+}
+
+export interface DownloadFileResult {
+  path: string;
+  downloaded: boolean;
+  bytesDownloaded: number;
+  totalBytes?: number;
+}
+
+export interface DownloadFileOptions {
+  force?: boolean;
+  onProgress?: (progress: {
+    bytesDownloaded: number;
+    totalBytes?: number;
+  }) => void | Promise<void>;
+}
+
+export interface OpenedDumpTextStream {
+  stream: ReadableStream<Uint8Array>;
+  completion: Promise<void>;
+  close: () => void;
+}
+
+/** Build the canonical Wikimedia dump URL for a wiki database name. */
+export function buildWikipediaDumpUrl(
+  wikiSlug: string,
+  dumpDate = DEFAULT_WIKIPEDIA_DUMP_DATE,
+  dumpKind = DEFAULT_WIKIPEDIA_DUMP_KIND,
+): string {
+  return `https://dumps.wikimedia.org/${wikiSlug}/${dumpDate}/${wikiSlug}-${dumpDate}-${dumpKind}.xml.bz2`;
+}
+
+/** Infer `enwiki` / `simplewiki` from a standard Wikimedia dump filename. */
+export function inferWikiSlugFromDumpName(
+  fileName: string,
+): string | undefined {
+  const match = /^([a-z0-9_]+)-(?:latest|\d{8})-[^.]+/i.exec(fileName);
+  return match?.[1]?.toLowerCase();
+}
+
+/** Infer `latest` / `20260501` from a standard Wikimedia dump filename. */
+export function inferDumpDateFromDumpName(
+  fileName: string,
+): string | undefined {
+  const match = /^[a-z0-9_]+-((?:latest|\d{8}))-[^.]+/i.exec(fileName);
+  return match?.[1]?.toLowerCase();
+}
+
+/** Infer `pages-articles-multistream` from a standard dump filename. */
+export function inferDumpKindFromDumpName(
+  fileName: string,
+): string | undefined {
+  const match = /^[a-z0-9_]+-(?:latest|\d{8})-(.+?)\.xml(?:\.bz2)?$/i.exec(
+    fileName,
+  );
+  return match?.[1]?.toLowerCase();
+}
+
+/** Download a URL to disk using a streaming response body. */
+export async function downloadFile(
+  url: string,
+  destinationPath: string,
+  options: DownloadFileOptions = {},
+): Promise<DownloadFileResult> {
+  if (existsSync(destinationPath) && !options.force) {
+    const size = await Bun.file(destinationPath).size;
+    return {
+      path: destinationPath,
+      downloaded: false,
+      bytesDownloaded: size,
+      totalBytes: size,
+    };
+  }
+
+  mkdirSync(dirname(destinationPath), { recursive: true });
+  const temporaryPath = `${destinationPath}.part`;
+  try {
+    if (existsSync(temporaryPath)) unlinkSync(temporaryPath);
+  } catch {
+    // Best effort cleanup; createWriteStream will surface a real error below.
+  }
+
+  const response = await fetch(url);
+  if (!response.ok) {
+    throw new Error(`Failed to download ${url}: HTTP ${response.status}`);
+  }
+  if (!response.body) {
+    throw new Error(`Failed to download ${url}: empty response body`);
+  }
+
+  const totalHeader = response.headers.get("content-length");
+  const totalBytes = totalHeader ? Number.parseInt(totalHeader, 10) : undefined;
+  const output = createWriteStream(temporaryPath);
+  const reader = response.body.getReader();
+  let bytesDownloaded = 0;
+
+  try {
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) break;
+      bytesDownloaded += value.byteLength;
+      if (!output.write(Buffer.from(value))) {
+        await once(output, "drain");
+      }
+      await options.onProgress?.({ bytesDownloaded, totalBytes });
+    }
+  } catch (error) {
+    output.destroy();
+    try {
+      unlinkSync(temporaryPath);
+    } catch {
+      // Ignore cleanup failures.
+    }
+    throw error;
+  }
+
+  output.end();
+  await once(output, "finish");
+  renameSync(temporaryPath, destinationPath);
+
+  return {
+    path: destinationPath,
+    downloaded: true,
+    bytesDownloaded,
+    totalBytes,
+  };
+}
+
+/**
+ * Open an XML or XML.bz2 dump as a UTF-8 byte stream.
+ *
+ * Bun/Node do not ship a native bzip2 decoder, so compressed Wikimedia dumps
+ * are decompressed by invoking an installed bzip2-compatible CLI. We prefer
+ * parallel implementations when present, then fall back to the ubiquitous
+ * `bzip2 -dc`.
+ */
+export function openDumpTextStream(dumpPath: string): OpenedDumpTextStream {
+  if (!dumpPath.toLowerCase().endsWith(".bz2")) {
+    return {
+      stream: Bun.file(dumpPath).stream() as ReadableStream<Uint8Array>,
+      completion: Promise.resolve(),
+      close: () => {},
+    };
+  }
+
+  const decompressor = findBzip2Decompressor();
+  if (!decompressor) {
+    throw new Error(
+      "No bzip2 decompressor found. Install bzip2, lbzip2, pbzip2, or bzcat to read Wikipedia .xml.bz2 dumps.",
+    );
+  }
+
+  const args = decompressor === "bzcat" ? [dumpPath] : ["-dc", dumpPath];
+  const child = spawn(decompressor, args, {
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+
+  if (!child.stdout) {
+    throw new Error(`Failed to open ${decompressor} stdout`);
+  }
+
+  let stderr = "";
+  child.stderr?.setEncoding("utf8");
+  child.stderr?.on("data", (chunk: string) => {
+    stderr = `${stderr}${chunk}`.slice(-4096);
+  });
+
+  let closeRequested = false;
+  const completion = new Promise<void>((resolve, reject) => {
+    child.on("error", reject);
+    child.on("close", (code, signal) => {
+      const detail = stderr.trim() ? `: ${stderr.trim()}` : "";
+      const stoppedByConsumer =
+        closeRequested || signal === "SIGTERM" || /broken pipe/i.test(stderr);
+      if (code === 0 || stoppedByConsumer) {
+        resolve();
+        return;
+      }
+      reject(
+        new Error(
+          `${decompressor} exited ${signal ? `with signal ${signal}` : `with code ${code}`}${detail}`,
+        ),
+      );
+    });
+  });
+
+  return {
+    stream: Readable.toWeb(child.stdout) as ReadableStream<Uint8Array>,
+    completion,
+    close: () => {
+      closeRequested = true;
+      if (!child.killed) child.kill("SIGTERM");
+    },
+  };
+}
+
+function findBzip2Decompressor(): string | undefined {
+  for (const command of ["lbzip2", "pbzip2", "bzip2", "bzcat"]) {
+    if (Bun.which(command)) return command;
+  }
+  return undefined;
+}
+
+/** Stream MediaWiki pages from a decompressed dump without loading the file. */
+export async function* streamMediaWikiPages(
+  stream: ReadableStream<Uint8Array>,
+): AsyncGenerator<WikipediaPage> {
+  const reader = stream.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+
+  try {
+    while (true) {
+      const { value, done } = await reader.read();
+      if (done) {
+        buffer += decoder.decode();
+        break;
+      }
+      buffer += decoder.decode(value, { stream: true });
+
+      yield* drainCompletePagesFromBuffer(
+        () => buffer,
+        (next) => {
+          buffer = next;
+        },
+      );
+    }
+
+    yield* drainCompletePagesFromBuffer(
+      () => buffer,
+      (next) => {
+        buffer = next;
+      },
+    );
+  } finally {
+    await reader.cancel().catch(() => {});
+  }
+}
+
+function* drainCompletePagesFromBuffer(
+  getBuffer: () => string,
+  setBuffer: (next: string) => void,
+): Generator<WikipediaPage> {
+  let buffer = getBuffer();
+  while (true) {
+    const start = buffer.indexOf("<page>");
+    if (start === -1) {
+      // Keep only a small suffix in case '<page>' is split across chunks.
+      setBuffer(buffer.slice(-16));
+      return;
+    }
+    if (start > 0) buffer = buffer.slice(start);
+
+    const end = buffer.indexOf("</page>");
+    if (end === -1) {
+      setBuffer(buffer);
+      return;
+    }
+
+    const pageXml = buffer.slice(0, end + "</page>".length);
+    buffer = buffer.slice(end + "</page>".length);
+    const page = parseMediaWikiPageXml(pageXml);
+    if (page) yield page;
+  }
+}
+
+/** Parse one <page>...</page> block from a MediaWiki XML export. */
+export function parseMediaWikiPageXml(pageXml: string): WikipediaPage | null {
+  const revisionStart = pageXml.indexOf("<revision>");
+  const pageHeaderXml =
+    revisionStart === -1 ? pageXml : pageXml.slice(0, revisionStart);
+  const revisionXml = revisionStart === -1 ? "" : pageXml.slice(revisionStart);
+
+  const title = extractXmlTagText(pageHeaderXml, "title");
+  const namespaceText = extractXmlTagText(pageHeaderXml, "ns");
+  const pageId = extractXmlTagText(pageHeaderXml, "id");
+  const revisionId = extractXmlTagText(revisionXml, "id") ?? "";
+  if (!title || !namespaceText || !pageId) return null;
+
+  const namespace = Number.parseInt(namespaceText, 10);
+  if (Number.isNaN(namespace)) return null;
+
+  const redirectMatch = /<redirect\b([^>]*)\/?\s*>/i.exec(pageHeaderXml);
+  const redirectTitle = redirectMatch
+    ? extractXmlAttribute(redirectMatch[1] ?? "", "title")
+    : undefined;
+
+  const textMatch = /<text\b([^>]*)>([\s\S]*?)<\/text>/i.exec(revisionXml);
+  const selfClosingTextMatch = /<text\b([^>]*)\/\s*>/i.exec(revisionXml);
+  const textAttributes = textMatch?.[1] ?? selfClosingTextMatch?.[1] ?? "";
+  const text = textMatch ? decodeXmlEntities(textMatch[2] ?? "") : "";
+  const textBytesRaw = extractXmlAttribute(textAttributes, "bytes");
+  const textBytes = textBytesRaw
+    ? Number.parseInt(textBytesRaw, 10)
+    : undefined;
+
+  return {
+    title,
+    namespace,
+    pageId,
+    revisionId,
+    timestamp: extractXmlTagText(revisionXml, "timestamp") ?? undefined,
+    text,
+    redirectTitle,
+    model: extractXmlTagText(revisionXml, "model") ?? undefined,
+    format: extractXmlTagText(revisionXml, "format") ?? undefined,
+    sha1: extractXmlTagText(revisionXml, "sha1") ?? undefined,
+    textBytes: Number.isFinite(textBytes) ? textBytes : undefined,
+  };
+}
+
+function extractXmlTagText(xml: string, tagName: string): string | null {
+  const match = new RegExp(
+    `<${tagName}\\b[^>]*>([\\s\\S]*?)<\\/${tagName}>`,
+    "i",
+  ).exec(xml);
+  return match ? decodeXmlEntities(match[1] ?? "") : null;
+}
+
+function extractXmlAttribute(
+  attributes: string,
+  name: string,
+): string | undefined {
+  const doubleQuoted = new RegExp(`${name}="([^"]*)"`, "i").exec(attributes);
+  if (doubleQuoted) return decodeXmlEntities(doubleQuoted[1] ?? "");
+  const singleQuoted = new RegExp(`${name}='([^']*)'`, "i").exec(attributes);
+  return singleQuoted ? decodeXmlEntities(singleQuoted[1] ?? "") : undefined;
+}
+
+/** Decode XML entities plus common HTML entities that survive wikitext cleanup. */
+export function decodeXmlEntities(value: string): string {
+  const namedEntities: Record<string, string> = {
+    amp: "&",
+    lt: "<",
+    gt: ">",
+    quot: '"',
+    apos: "'",
+    nbsp: " ",
+    ndash: "–",
+    mdash: "—",
+  };
+
+  return value.replace(
+    /&(#x[0-9a-fA-F]+|#[0-9]+|[A-Za-z][A-Za-z0-9]+);/g,
+    (entity, body: string) => {
+      if (body.startsWith("#x")) {
+        const codePoint = Number.parseInt(body.slice(2), 16);
+        return Number.isFinite(codePoint)
+          ? String.fromCodePoint(codePoint)
+          : entity;
+      }
+      if (body.startsWith("#")) {
+        const codePoint = Number.parseInt(body.slice(1), 10);
+        return Number.isFinite(codePoint)
+          ? String.fromCodePoint(codePoint)
+          : entity;
+      }
+      return namedEntities[body.toLowerCase()] ?? entity;
+    },
+  );
+}
+
+/** Extract article categories from raw wikitext before category links are stripped. */
+export function extractCategories(wikitext: string): string[] {
+  const categories: string[] = [];
+  const seen = new Set<string>();
+  const re = /\[\[\s*Category\s*:\s*([^\]|#]+)(?:#[^\]|]*)?(?:\|[^\]]*)?\]\]/gi;
+  for (const match of wikitext.matchAll(re)) {
+    const category = decodeXmlEntities(match[1] ?? "")
+      .replace(/_/g, " ")
+      .replace(/\s+/g, " ")
+      .trim();
+    const key = category.toLowerCase();
+    if (category && !seen.has(key)) {
+      seen.add(key);
+      categories.push(category);
+    }
+  }
+  return categories;
+}
+
+/**
+ * Lightweight wikitext-to-plain-text conversion.
+ *
+ * This intentionally favors speed and predictable memory use over perfect
+ * MediaWiki rendering. It removes high-noise constructs (templates, refs,
+ * tables, files, categories) and keeps readable article prose plus headings.
+ */
+export function cleanWikitextToPlainText(wikitext: string): string {
+  let text = wikitext;
+
+  text = text.replace(/<!--([\s\S]*?)-->/g, "");
+  text = text.replace(/<ref\b[^>]*\/>/gi, "");
+  text = text.replace(/<ref\b[^>]*>[\s\S]*?<\/ref>/gi, "");
+  text = text.replace(/<references\b[^>]*\/>/gi, "");
+  text = text.replace(/<gallery\b[^>]*>[\s\S]*?<\/gallery>/gi, "");
+  text = text.replace(/<timeline\b[^>]*>[\s\S]*?<\/timeline>/gi, "");
+  text = text.replace(/<score\b[^>]*>[\s\S]*?<\/score>/gi, "");
+  text = text.replace(/<math\b[^>]*>[\s\S]*?<\/math>/gi, "");
+
+  text = stripWikiTables(text);
+  text = stripBalancedTemplates(text);
+
+  // Drop file/image links and category declarations before generic link cleanup.
+  // File captions often contain nested links, so this must be balanced instead
+  // of a single regex; otherwise captions leak through as `...]]` fragments.
+  text = stripWikiLinksByNamespace(text, ["file", "image", "category"]);
+
+  const headingSentinel = "\uE000";
+  text = text.replace(
+    /^(={2,6})\s*(.*?)\s*\1\s*$/gm,
+    (_match, marker: string, heading: string) => {
+      const markdownLevel = Math.min(marker.length, 6);
+      return `${headingSentinel}${"#".repeat(markdownLevel)} ${heading.trim()}`;
+    },
+  );
+
+  text = text.replace(/'''([^'].*?)'''/g, "$1");
+  text = text.replace(/''([^'].*?)''/g, "$1");
+
+  // External links: keep labels, remove bare URLs.
+  text = text.replace(/\[https?:\/\/[^\s\]]+\s+([^\]]+)\]/gi, "$1");
+  text = text.replace(/\[https?:\/\/[^\]]+\]/gi, "");
+
+  // Internal links: [[Target|label]] -> label, [[Target]] -> Target.
+  text = text.replace(/\[\[([^[\]\n]+?)\]\]/g, (_match, linkBody: string) => {
+    const parts = linkBody.split("|");
+    const target = (parts[0] ?? "").trim().replace(/^:/, "");
+    if (/^(?:category|file|image):/i.test(target)) return "";
+    const label = (parts.length > 1 ? parts[parts.length - 1] : target) ?? "";
+    return label.replace(/_/g, " ").replace(/^:/, "").trim();
+  });
+
+  text = text.replace(/<br\s*\/?\s*>/gi, "\n");
+  text = text.replace(/<\/(?:p|div|section)>/gi, "\n\n");
+  text = text.replace(/<[^>]+>/g, "");
+  text = decodeXmlEntities(text);
+
+  // Wikitext list markers to readable plain/markdown-ish markers.
+  text = text
+    .split("\n")
+    .map((line) => {
+      const trimmed = line.trimEnd();
+      if (trimmed.startsWith(headingSentinel)) return trimmed.slice(1);
+      if (/^\*+\s*/.test(trimmed)) return trimmed.replace(/^\*+\s*/, "- ");
+      if (/^#+\s*/.test(trimmed)) return trimmed.replace(/^#+\s*/, "1. ");
+      if (/^[;:]+\s*/.test(trimmed)) return trimmed.replace(/^[;:]+\s*/, "");
+      return trimmed;
+    })
+    .join("\n");
+
+  // Remove lingering table row syntax and magic words.
+  text = text.replace(/^\s*(?:\|-|[|!])[^\n]*$/gm, "");
+  text = text.replace(/__[A-Z_]+__/g, "");
+
+  const cleaned = text
+    .replace(/[ \t]+/g, " ")
+    .replace(/[ \t]+\n/g, "\n")
+    .replace(/\n{3,}/g, "\n\n")
+    .trim();
+
+  return removeEmptyListItemsAndSections(cleaned);
+}
+
+function removeEmptyListItemsAndSections(input: string): string {
+  let lines = input.split("\n").filter((line) => !/^\s*[-*]\s*$/.test(line));
+  let previousLength = -1;
+  while (lines.length !== previousLength) {
+    previousLength = lines.length;
+    lines = removeEmptySections(lines);
+  }
+  return lines
+    .join("\n")
+    .replace(/\n{3,}/g, "\n\n")
+    .trim();
+}
+
+function removeEmptySections(lines: string[]): string[] {
+  const output: string[] = [];
+  for (let index = 0; index < lines.length; index++) {
+    const line = lines[index] ?? "";
+    const level = markdownSectionHeadingLevel(line);
+    if (level === null) {
+      output.push(line);
+      continue;
+    }
+
+    let sectionEnd = index + 1;
+    while (sectionEnd < lines.length) {
+      const nextLevel = markdownSectionHeadingLevel(lines[sectionEnd] ?? "");
+      if (nextLevel !== null && nextLevel <= level) break;
+      sectionEnd++;
+    }
+
+    const hasSectionContent = lines
+      .slice(index + 1, sectionEnd)
+      .some(
+        (candidateLine) =>
+          candidateLine.trim().length > 0 &&
+          markdownSectionHeadingLevel(candidateLine) === null,
+      );
+    if (hasSectionContent) output.push(line);
+  }
+  return output;
+}
+
+function markdownSectionHeadingLevel(line: string): number | null {
+  const match = /^(#{2,6})\s+\S/.exec(line.trim());
+  return match ? (match[1] ?? "").length : null;
+}
+
+function stripWikiTables(input: string): string {
+  let previous = input;
+  while (true) {
+    const next = previous.replace(/\{\|[\s\S]*?\|\}/g, "\n");
+    if (next === previous) return next;
+    previous = next;
+  }
+}
+
+function stripWikiLinksByNamespace(
+  input: string,
+  namespaces: string[],
+): string {
+  const namespaceSet = new Set(
+    namespaces.map((namespace) => namespace.toLowerCase()),
+  );
+  let output = "";
+  let index = 0;
+
+  while (index < input.length) {
+    const start = input.indexOf("[[", index);
+    if (start === -1) {
+      output += input.slice(index);
+      break;
+    }
+
+    const linkPrefix = /^\[\[\s*:?\s*([A-Za-z]+)\s*:/i.exec(
+      input.slice(start, start + 80),
+    );
+    if (!linkPrefix || !namespaceSet.has((linkPrefix[1] ?? "").toLowerCase())) {
+      output += input.slice(index, start + 2);
+      index = start + 2;
+      continue;
+    }
+
+    output += input.slice(index, start);
+    let depth = 1;
+    let cursor = start + 2;
+    while (cursor < input.length && depth > 0) {
+      if (input.startsWith("[[", cursor)) {
+        depth++;
+        cursor += 2;
+      } else if (input.startsWith("]]", cursor)) {
+        depth--;
+        cursor += 2;
+      } else {
+        cursor++;
+      }
+    }
+    index = cursor;
+  }
+
+  return output;
+}
+
+function stripBalancedTemplates(input: string): string {
+  let output = "";
+  let depth = 0;
+  for (let index = 0; index < input.length; index++) {
+    if (input.startsWith("{{", index)) {
+      depth++;
+      index++;
+      continue;
+    }
+    if (depth > 0 && input.startsWith("}}", index)) {
+      depth--;
+      index++;
+      continue;
+    }
+    if (depth === 0) output += input[index] ?? "";
+  }
+  return output;
+}
+
+/** Build a MemoryCreateParams payload for one parsed article page. */
+export function buildWikipediaMemory(
+  page: WikipediaPage,
+  options: WikipediaMemoryBuildOptions,
+): BuiltWikipediaMemory | null {
+  const categories = extractCategories(page.text);
+  const body =
+    options.contentMode === "wikitext"
+      ? page.text.trim()
+      : cleanWikitextToPlainText(page.text);
+  if (!body) return null;
+
+  const rawContent = `# ${page.title}\n\n${body}`;
+  const truncated = truncateUtf8(rawContent, options.maxContentBytes);
+  const content = truncated.text;
+  const articleSlug = normalizeSlug(page.title);
+  const primaryCategory = categories[0] ?? "Uncategorized";
+  const primaryCategorySlug = normalizeSlug(primaryCategory);
+  const tree = `${options.treeRoot}.${primaryCategorySlug}`;
+  const sourceUrl = buildWikipediaArticleUrl(options.wikiSlug, page.title);
+  const temporalStart =
+    page.timestamp && !Number.isNaN(Date.parse(page.timestamp))
+      ? new Date(Date.parse(page.timestamp)).toISOString()
+      : undefined;
+
+  const meta: Record<string, unknown> = {
+    type: "wikipedia_article",
+    source: "wikipedia",
+    source_wiki: options.wikiSlug,
+    source_page_id: page.pageId,
+    source_revision_id: page.revisionId,
+    source_title: page.title,
+    source_namespace: page.namespace,
+    source_url: sourceUrl,
+    source_format: "mediawiki_xml",
+    content_format:
+      options.contentMode === "wikitext" ? "mediawiki_wikitext" : "plain_text",
+    categories,
+    primary_category: primaryCategory,
+    primary_category_slug: primaryCategorySlug,
+    article_slug: articleSlug,
+    imported_at: options.importedAt,
+    importer_version: WIKIPEDIA_IMPORTER_VERSION,
+  };
+
+  if (options.sourceDumpPath) meta.source_dump_path = options.sourceDumpPath;
+  if (options.sourceDumpUrl) meta.source_dump_url = options.sourceDumpUrl;
+  if (options.sourceDumpDate) meta.source_dump_date = options.sourceDumpDate;
+  if (options.sourceDumpKind) meta.source_dump_kind = options.sourceDumpKind;
+  if (page.timestamp) meta.source_revision_timestamp = page.timestamp;
+  if (page.redirectTitle) meta.source_redirect_title = page.redirectTitle;
+  if (page.model) meta.source_model = page.model;
+  if (page.format) meta.source_text_format = page.format;
+  if (page.sha1) meta.source_revision_sha1 = page.sha1;
+  if (page.textBytes !== undefined) meta.source_text_bytes = page.textBytes;
+  if (truncated.truncated) meta.content_truncated = true;
+  if (options.maxContentBytes !== undefined) {
+    meta.max_content_bytes = options.maxContentBytes;
+  }
+
+  return {
+    memory: {
+      id: deterministicWikipediaPageUuidV7(options.wikiSlug, page.pageId),
+      content,
+      tree,
+      meta,
+      ...(temporalStart ? { temporal: { start: temporalStart } } : {}),
+    },
+    categories,
+    truncated: truncated.truncated,
+    contentBytes: Buffer.byteLength(content, "utf8"),
+    articleSlug,
+  };
+}
+
+export function buildWikipediaArticleUrl(
+  wikiSlug: string,
+  title: string,
+): string {
+  const host = wikipediaHostFromWikiSlug(wikiSlug);
+  const encodedTitle = encodeURIComponent(title.replace(/ /g, "_"));
+  return `https://${host}/wiki/${encodedTitle}`;
+}
+
+export function wikipediaHostFromWikiSlug(wikiSlug: string): string {
+  const project = wikiSlug.endsWith("wiki") ? wikiSlug.slice(0, -4) : wikiSlug;
+  return `${project}.wikipedia.org`;
+}
+
+function truncateUtf8(
+  input: string,
+  maxBytes: number | undefined,
+): { text: string; truncated: boolean } {
+  if (maxBytes === undefined || maxBytes <= 0) {
+    return { text: input, truncated: false };
+  }
+  if (Buffer.byteLength(input, "utf8") <= maxBytes) {
+    return { text: input, truncated: false };
+  }
+
+  const suffix = "\n\n[Article truncated during Wikipedia import.]";
+  const suffixBytes = Buffer.byteLength(suffix, "utf8");
+  const contentBudget = Math.max(0, maxBytes - suffixBytes);
+  let low = 0;
+  let high = input.length;
+  while (low < high) {
+    const mid = Math.ceil((low + high) / 2);
+    if (Buffer.byteLength(input.slice(0, mid), "utf8") <= contentBudget) {
+      low = mid;
+    } else {
+      high = mid - 1;
+    }
+  }
+
+  return {
+    text: `${input.slice(0, low).trimEnd()}${suffix}`,
+    truncated: true,
+  };
+}
+
+/**
+ * Stable UUIDv7 per Wikipedia page id.
+ *
+ * The id intentionally keys on page id rather than revision id so repeated
+ * imports of newer dumps do not create duplicate memories for the same article.
+ * The current revision id remains available in metadata.
+ */
+export function deterministicWikipediaPageUuidV7(
+  wikiSlug: string,
+  pageId: string,
+): string {
+  const bytes = new Uint8Array(16);
+  const timestampMs = WIKIPEDIA_LAUNCH_TIMESTAMP_MS;
+  bytes[0] = Math.floor(timestampMs / 2 ** 40) & 0xff;
+  bytes[1] = Math.floor(timestampMs / 2 ** 32) & 0xff;
+  bytes[2] = Math.floor(timestampMs / 2 ** 24) & 0xff;
+  bytes[3] = Math.floor(timestampMs / 2 ** 16) & 0xff;
+  bytes[4] = Math.floor(timestampMs / 2 ** 8) & 0xff;
+  bytes[5] = timestampMs & 0xff;
+
+  const digest = createHash("sha256")
+    .update(`wikipedia:${wikiSlug}:${pageId}`, "utf8")
+    .digest();
+  const randA = ((digest[0] ?? 0) << 8) | (digest[1] ?? 0);
+  bytes[6] = 0x70 | ((randA >> 8) & 0x0f);
+  bytes[7] = randA & 0xff;
+  bytes[8] = 0x80 | ((digest[2] ?? 0) & 0x3f);
+  for (let i = 0; i < 7; i++) {
+    bytes[9 + i] = digest[3 + i] ?? 0;
+  }
+
+  return bytesToUuid(bytes);
+}
+
+function bytesToUuid(bytes: Uint8Array): string {
+  const hex: string[] = [];
+  for (let i = 0; i < 16; i++) {
+    hex.push((bytes[i] ?? 0).toString(16).padStart(2, "0"));
+  }
+  return (
+    `${hex.slice(0, 4).join("")}-` +
+    `${hex.slice(4, 6).join("")}-` +
+    `${hex.slice(6, 8).join("")}-` +
+    `${hex.slice(8, 10).join("")}-` +
+    `${hex.slice(10, 16).join("")}`
+  );
+}