From 701c59ea2e19ec2af54711ab86964ef12bafdb42 Mon Sep 17 00:00:00 2001 From: CahidArda Date: Mon, 25 May 2026 11:00:05 +0300 Subject: [PATCH 1/4] docs: add agent memory tutorial using Redis Search --- docs.json | 59 ++-- redis/tutorials/agent_memory.mdx | 460 +++++++++++++++++++++++++++++++ 2 files changed, 490 insertions(+), 29 deletions(-) create mode 100644 redis/tutorials/agent_memory.mdx diff --git a/docs.json b/docs.json index 079d5528..920c86ab 100644 --- a/docs.json +++ b/docs.json @@ -132,22 +132,20 @@ ] }, { - "group": "Features", + "group": "How To", "pages": [ - "redis/features/globaldatabase", - "redis/features/restapi", - "redis/features/backup", - "redis/features/durability", - "redis/features/replication", - "redis/features/key-locking", - "redis/features/eviction", - "redis/features/security", - "redis/features/credential-protection", - "redis/features/consistency", - "redis/features/auto-upgrade" + "redis/howto/connect-client", + "redis/howto/connect-with-upstash-redis", + "redis/howto/upgrade-database", + "redis/howto/metrics-and-charts", + "redis/howto/monitoryourusage", + "redis/howto/importexport", + "redis/howto/keyspacenotifications", + "redis/howto/ipallowlist", + "redis/howto/readyourwrites", + "redis/howto/migratefromregionaltoglobal" ] - }, - { + }, { "group": "SDKs", "pages": [ { @@ -726,11 +724,19 @@ ] }, { - "group": "Security & Compliance", + "group": "Features", "pages": [ - "redis/help/production-checklist", - "redis/help/shared-responsibility-model", - "redis/help/managing-healthcare-data" + "redis/features/globaldatabase", + "redis/features/restapi", + "redis/features/backup", + "redis/features/durability", + "redis/features/replication", + "redis/features/key-locking", + "redis/features/eviction", + "redis/features/security", + "redis/features/credential-protection", + "redis/features/consistency", + "redis/features/auto-upgrade" ] }, { @@ -825,19 +831,13 @@ } ] }, + { - "group": "How To", + "group": "Security & Compliance", "pages": [ - "redis/howto/connect-client", - "redis/howto/connect-with-upstash-redis", - "redis/howto/upgrade-database", - "redis/howto/metrics-and-charts", - "redis/howto/monitoryourusage", - "redis/howto/importexport", - "redis/howto/keyspacenotifications", - "redis/howto/ipallowlist", - "redis/howto/readyourwrites", - "redis/howto/migratefromregionaltoglobal" + "redis/help/production-checklist", + "redis/help/shared-responsibility-model", + "redis/help/managing-healthcare-data" ] }, { @@ -867,6 +867,7 @@ "redis/tutorials/python_fastapi_caching", "redis/tutorials/python_url_shortener", "redis/tutorials/api_with_cdk", + "redis/tutorials/agent_memory", "redis/tutorials/auto_complete_with_serverless_redis", "redis/tutorials/aws_app_runner_with_redis", "redis/tutorials/cloud_run_sessions", diff --git a/redis/tutorials/agent_memory.mdx b/redis/tutorials/agent_memory.mdx new file mode 100644 index 00000000..91d807d0 --- /dev/null +++ b/redis/tutorials/agent_memory.mdx @@ -0,0 +1,460 @@ +--- +title: Agent Memory with Redis Search +description: Build short-term and long-term memory for AI agents on Upstash Redis. Store working memory with TTLs and recall long-term memories with Redis Search full-text queries. +--- + +Large language models are stateless: once a request returns, the model forgets +everything. To build an agent that remembers who a user is and what happened in +past conversations, you need to store that context yourself and feed it back into +the prompt. + +In this tutorial we build a small but complete **agent memory** layer on Upstash +Redis, with two tiers: + +- **Working memory** — the running conversation for the current session, stored + in a single Redis key with a TTL so it expires on its own. +- **Long-term memory** — durable facts about the user (preferences, events, + decisions) stored as JSON documents and recalled with [Redis Search](/redis/search/introduction) + full-text queries. + +On every turn the agent **recalls** relevant long-term memories, answers using +those plus the recent conversation, then **remembers** any new facts worth keeping. + + +This tutorial uses OpenAI for the chat and fact-extraction calls, but the memory +layer itself is model-agnostic — swap in any LLM you like. + + +## Prerequisites + +- An [Upstash Redis](https://console.upstash.com) database (the REST URL and token). +- An OpenAI API key. + +Install the dependencies: + + + + +```bash +npm install @upstash/redis openai +``` + + + +```bash +pip install upstash-redis openai +``` + + + + +Set your environment variables: + +```bash +UPSTASH_REDIS_REST_URL="https://..." +UPSTASH_REDIS_REST_TOKEN="..." +OPENAI_API_KEY="sk-..." +``` + +## Step 1: Create the long-term memory index + +Long-term memories are JSON documents stored under the `memory:` prefix. We index +the `text` field for full-text recall, and keep `userId` and `kind` as exact-match +keywords so we can scope a search to a single user. `createdAt` is a sortable +number we can use to favor recent memories. + +Create the index **once** (e.g. in a setup script) — not on every request. + + + + +```ts +// setup.ts +import { Redis, s } from "@upstash/redis"; + +const redis = Redis.fromEnv(); + +await redis.search.createIndex({ + name: "memories", + dataType: "json", + prefix: "memory:", + schema: s.object({ + text: s.string(), // full-text searchable fact + userId: s.keyword(), // exact-match owner + kind: s.keyword(), // "preference" | "event" | "fact" ... + createdAt: s.number(), // epoch ms, sortable + }), +}); +``` + + + +```python +# setup.py +from upstash_redis import Redis + +redis = Redis.from_env() + +redis.search.create_index( + name="memories", + data_type="json", + prefix="memory:", + schema={ + "text": "TEXT", # full-text searchable fact + "userId": "KEYWORD", # exact-match owner + "kind": "KEYWORD", # "preference" | "event" | "fact" ... + "createdAt": "F64", # epoch ms, sortable + }, +) +``` + + + + +## Step 2: Working (short-term) memory + +Working memory is just the recent message history for a session. We store it as a +single JSON value with a one-hour TTL and cap it to the last 20 messages so the +prompt stays small. When the session goes quiet, Redis expires the key for us. + + + + +```ts +// memory.ts +import { Redis } from "@upstash/redis"; + +const redis = Redis.fromEnv(); + +export type Message = { role: "user" | "assistant"; content: string }; + +const SESSION_TTL = 60 * 60; // 1 hour +const MAX_MESSAGES = 20; + +export async function loadHistory(sessionId: string): Promise { + return (await redis.get(`chat:${sessionId}`)) ?? []; +} + +export async function saveHistory(sessionId: string, messages: Message[]) { + const trimmed = messages.slice(-MAX_MESSAGES); + await redis.set(`chat:${sessionId}`, trimmed, { ex: SESSION_TTL }); +} +``` + + + +```python +# memory.py +import json +from upstash_redis import Redis + +redis = Redis.from_env() + +SESSION_TTL = 60 * 60 # 1 hour +MAX_MESSAGES = 20 + + +def load_history(session_id: str) -> list[dict]: + raw = redis.get(f"chat:{session_id}") + return json.loads(raw) if raw else [] + + +def save_history(session_id: str, messages: list[dict]) -> None: + trimmed = messages[-MAX_MESSAGES:] + redis.set(f"chat:{session_id}", json.dumps(trimmed), ex=SESSION_TTL) +``` + + + + +## Step 3: Recall relevant memories + +To answer well, the agent needs the long-term facts that relate to the current +message. We run a full-text query against the `memories` index, scoped to the +user with the `userId` keyword. Redis Search ranks matches by relevance, so we +take the top few. + + + + +```ts +const memories = redis.search.index({ name: "memories" }); + +export async function recall( + userId: string, + query: string, + limit = 5, +): Promise { + const results = await memories.query({ + filter: { text: query, userId }, + limit, + }); + + // No memories yet → the index may not exist → results is null + return (results ?? []).map((r) => r.data.text as string); +} +``` + + + +```python +memories = redis.search.index(name="memories") + + +def recall(user_id: str, query: str, limit: int = 5) -> list[str]: + results = memories.query(filter={"text": query, "userId": user_id}, limit=limit) + + # No memories yet → the index may not exist → results is None + return [r.data["text"] for r in (results or [])] +``` + + + + + +To bias recall toward recent memories, you can boost the score with the +`createdAt` field using a [score function](/redis/search/querying#4-score-function), +or sort with `orderBy` / `order_by`. We keep plain relevance ranking here for +simplicity. + + +## Step 4: Remember new facts + +After each exchange we ask the model to pull out durable facts — things worth +remembering across sessions, not small talk. Each fact becomes a JSON document +under the `memory:` prefix, so the index picks it up automatically. + +Because full-text search gives us a cheap similarity check, we **deduplicate** +before writing: if a very similar memory already exists for this user, we skip it. + + + + +```ts +import OpenAI from "openai"; + +const openai = new OpenAI(); + +// Heuristic: full-text scores are unbounded, so this threshold is tuned by feel. +const DEDUPE_SCORE = 8; + +async function alreadyKnown(userId: string, text: string): Promise { + const hits = await memories.query({ filter: { text, userId }, limit: 1 }); + return !!hits?.length && hits[0].score > DEDUPE_SCORE; +} + +export async function remember(userId: string, conversation: Message[]) { + const completion = await openai.chat.completions.create({ + model: "gpt-4o-mini", + response_format: { type: "json_object" }, + messages: [ + { + role: "system", + content: + "Extract durable facts about the user worth remembering across " + + "sessions (preferences, decisions, personal details). Ignore " + + 'small talk. Respond as JSON: {"facts": ["..."]}. Empty if none.', + }, + { role: "user", content: JSON.stringify(conversation) }, + ], + }); + + const { facts } = JSON.parse(completion.choices[0].message.content ?? '{"facts":[]}'); + + for (const text of facts as string[]) { + if (await alreadyKnown(userId, text)) continue; + const id = crypto.randomUUID(); + await redis.json.set(`memory:${userId}:${id}`, "$", { + text, + userId, + kind: "fact", + createdAt: Date.now(), + }); + } +} +``` + + + +```python +import json +import uuid +import time +from openai import OpenAI + +openai = OpenAI() + +# Heuristic: full-text scores are unbounded, so this threshold is tuned by feel. +DEDUPE_SCORE = 8 + + +def already_known(user_id: str, text: str) -> bool: + hits = memories.query(filter={"text": text, "userId": user_id}, limit=1) + return bool(hits) and hits[0].score > DEDUPE_SCORE + + +def remember(user_id: str, conversation: list[dict]) -> None: + completion = openai.chat.completions.create( + model="gpt-4o-mini", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "Extract durable facts about the user worth remembering " + "across sessions (preferences, decisions, personal details). " + "Ignore small talk. Respond as JSON: {\"facts\": [\"...\"]}. " + "Empty if none." + ), + }, + {"role": "user", "content": json.dumps(conversation)}, + ], + ) + + facts = json.loads(completion.choices[0].message.content or '{"facts":[]}')["facts"] + + for text in facts: + if already_known(user_id, text): + continue + memory_id = uuid.uuid4().hex + redis.json().set( + f"memory:{user_id}:{memory_id}", + "$", + { + "text": text, + "userId": user_id, + "kind": "fact", + "createdAt": int(time.time() * 1000), + }, + ) +``` + + + + +## Step 5: The chat loop + +Now we wire it together. Each turn: **recall** relevant memories, build a prompt +from those plus the working memory, call the model, persist the updated history, +and **remember** new facts. + + + + +```ts +export async function chat(userId: string, sessionId: string, input: string) { + const [history, recalled] = await Promise.all([ + loadHistory(sessionId), + recall(userId, input), + ]); + + const system = + "You are a helpful assistant. Use the following remembered facts about " + + `the user when relevant:\n${recalled.map((m) => `- ${m}`).join("\n") || "(none yet)"}`; + + const completion = await openai.chat.completions.create({ + model: "gpt-4o-mini", + messages: [ + { role: "system", content: system }, + ...history, + { role: "user", content: input }, + ], + }); + + const reply = completion.choices[0].message.content ?? ""; + + const updated: Message[] = [ + ...history, + { role: "user", content: input }, + { role: "assistant", content: reply }, + ]; + + await saveHistory(sessionId, updated); + await remember(userId, updated); // fire-and-forget in production + + return reply; +} +``` + + + +```python +def chat(user_id: str, session_id: str, user_input: str) -> str: + history = load_history(session_id) + recalled = recall(user_id, user_input) + + facts = "\n".join(f"- {m}" for m in recalled) or "(none yet)" + system = ( + "You are a helpful assistant. Use the following remembered facts " + f"about the user when relevant:\n{facts}" + ) + + completion = openai.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + *history, + {"role": "user", "content": user_input}, + ], + ) + + reply = completion.choices[0].message.content or "" + + updated = history + [ + {"role": "user", "content": user_input}, + {"role": "assistant", "content": reply}, + ] + + save_history(session_id, updated) + remember(user_id, updated) # run in the background in production + + return reply +``` + + + + +## Try it + +Run two sessions for the same user. Even after the first session's working memory +expires, the facts learned there are recalled in the second: + + + + +```ts +await chat("user-1", "session-a", "I'm vegetarian and I love spicy food."); +// ...later, a brand new session... +const reply = await chat("user-1", "session-b", "Suggest a dinner for me."); +console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory +``` + + + +```python +chat("user-1", "session-a", "I'm vegetarian and I love spicy food.") +# ...later, a brand new session... +reply = chat("user-1", "session-b", "Suggest a dinner for me.") +print(reply) # recalls "vegetarian" + "spicy" from long-term memory +``` + + + + +## How it fits together + +- **Working memory** lives under `chat:{sessionId}` with a TTL — fast to read, + self-expiring, scoped to one conversation. +- **Long-term memory** lives under `memory:{userId}:{id}` and is searchable across + sessions through the `memories` index. +- **Recall** uses full-text relevance to surface the facts that matter for the + current message; **remember** extracts and deduplicates new ones. + +## Next steps + +- Add a `kind` such as `"preference"` vs `"event"` and filter recall by it. +- Boost recent memories with a [score function](/redis/search/querying#4-score-function). +- Summarize older working-memory messages instead of dropping them. +- Stream the reply to a chat UI and animate it smoothly — see + [Smooth Text Streaming in AI SDK v5](https://upstash.com/blog/smooth-streaming). +- Learn more about what Redis Search can do in the [Search docs](/redis/search/introduction). From 6e3171de0d12aaf346f3d787d8957a79d8a5a721 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 08:00:43 +0000 Subject: [PATCH 2/4] chore(llms): regenerate llms.txt and llms-full.txt --- llms-full.txt | 457 +++++++++++++++++++++++++++++++++++++++++++++++++- llms.txt | 1 + 2 files changed, 457 insertions(+), 1 deletion(-) diff --git a/llms-full.txt b/llms-full.txt index fd95be98..c348095c 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -18765,7 +18765,8 @@ Please check our [pricing page](https://upstash.com/pricing/redis) for the most Upstash Redis has two paid plans: -* **Pay-As-You-Go (PAYG).** Billed per request. Best for variable or low-volume workloads where command count is unpredictable, such as caching in serverless functions, edge workloads, and apps that scale to zero. A monthly ceiling price caps your maximum cost. +* **Pay-As-You-Go (PAYG).** Billed per request. Best for variable or low-volume workloads where command volume is unpredictable, such as caching in serverless functions, edge workloads, and apps that scale to zero. To help prevent unexpected charges, it's possible to set a monthly budget on pay-as-you-go plans. + * **Fixed plans.** A flat monthly price with a cap on throughput and data size. Usually cheaper than PAYG for sustained, high-throughput workloads where command count is consistently high and predictable. Worker-heavy setups such as Sidekiq, BullMQ, Celery, and cron jobs are a common fit. You can start on PAYG and switch to a Fixed plan later, or vice versa. See [all plans and limits](https://upstash.com/pricing) for the full breakdown. @@ -28264,6 +28265,460 @@ messages that are delivered to a consumer but not yet acknowledged via Acknowledge the consumed messages via [XACK](https://redis.io/commands/xack/) from the list of the associated group and consumer. +# Agent Memory with Redis Search +Source: https://upstash.com/docs/redis/tutorials/agent_memory + +Large language models are stateless: once a request returns, the model forgets +everything. To build an agent that remembers who a user is and what happened in +past conversations, you need to store that context yourself and feed it back into +the prompt. + +In this tutorial we build a small but complete **agent memory** layer on Upstash +Redis, with two tiers: + +* **Working memory** — the running conversation for the current session, stored + in a single Redis key with a TTL so it expires on its own. +* **Long-term memory** — durable facts about the user (preferences, events, + decisions) stored as JSON documents and recalled with [Redis Search](/docs/redis/search/introduction) + full-text queries. + +On every turn the agent **recalls** relevant long-term memories, answers using +those plus the recent conversation, then **remembers** any new facts worth keeping. + + +This tutorial uses OpenAI for the chat and fact-extraction calls, but the memory +layer itself is model-agnostic — swap in any LLM you like. + + +## Prerequisites + +* An [Upstash Redis](https://console.upstash.com) database (the REST URL and token). +* An OpenAI API key. + +Install the dependencies: + + + + +```bash +npm install @upstash/redis openai +``` + + + +```bash +pip install upstash-redis openai +``` + + + + +Set your environment variables: + +```bash +UPSTASH_REDIS_REST_URL="https://..." +UPSTASH_REDIS_REST_TOKEN="..." +OPENAI_API_KEY="sk-..." +``` + +## Step 1: Create the long-term memory index + +Long-term memories are JSON documents stored under the `memory:` prefix. We index +the `text` field for full-text recall, and keep `userId` and `kind` as exact-match +keywords so we can scope a search to a single user. `createdAt` is a sortable +number we can use to favor recent memories. + +Create the index **once** (e.g. in a setup script) — not on every request. + + + + +```ts +// setup.ts +import { Redis, s } from "@upstash/redis"; + +const redis = Redis.fromEnv(); + +await redis.search.createIndex({ + name: "memories", + dataType: "json", + prefix: "memory:", + schema: s.object({ + text: s.string(), // full-text searchable fact + userId: s.keyword(), // exact-match owner + kind: s.keyword(), // "preference" | "event" | "fact" ... + createdAt: s.number(), // epoch ms, sortable + }), +}); +``` + + + +```python +# setup.py +from upstash_redis import Redis + +redis = Redis.from_env() + +redis.search.create_index( + name="memories", + data_type="json", + prefix="memory:", + schema={ + "text": "TEXT", # full-text searchable fact + "userId": "KEYWORD", # exact-match owner + "kind": "KEYWORD", # "preference" | "event" | "fact" ... + "createdAt": "F64", # epoch ms, sortable + }, +) +``` + + + + +## Step 2: Working (short-term) memory + +Working memory is just the recent message history for a session. We store it as a +single JSON value with a one-hour TTL and cap it to the last 20 messages so the +prompt stays small. When the session goes quiet, Redis expires the key for us. + + + + +```ts +// memory.ts +import { Redis } from "@upstash/redis"; + +const redis = Redis.fromEnv(); + +export type Message = { role: "user" | "assistant"; content: string }; + +const SESSION_TTL = 60 * 60; // 1 hour +const MAX_MESSAGES = 20; + +export async function loadHistory(sessionId: string): Promise { + return (await redis.get(`chat:${sessionId}`)) ?? []; +} + +export async function saveHistory(sessionId: string, messages: Message[]) { + const trimmed = messages.slice(-MAX_MESSAGES); + await redis.set(`chat:${sessionId}`, trimmed, { ex: SESSION_TTL }); +} +``` + + + +```python +# memory.py +import json +from upstash_redis import Redis + +redis = Redis.from_env() + +SESSION_TTL = 60 * 60 # 1 hour +MAX_MESSAGES = 20 + +def load_history(session_id: str) -> list[dict]: + raw = redis.get(f"chat:{session_id}") + return json.loads(raw) if raw else [] + +def save_history(session_id: str, messages: list[dict]) -> None: + trimmed = messages[-MAX_MESSAGES:] + redis.set(f"chat:{session_id}", json.dumps(trimmed), ex=SESSION_TTL) +``` + + + + +## Step 3: Recall relevant memories + +To answer well, the agent needs the long-term facts that relate to the current +message. We run a full-text query against the `memories` index, scoped to the +user with the `userId` keyword. Redis Search ranks matches by relevance, so we +take the top few. + + + + +```ts +const memories = redis.search.index({ name: "memories" }); + +export async function recall( + userId: string, + query: string, + limit = 5, +): Promise { + const results = await memories.query({ + filter: { text: query, userId }, + limit, + }); + + // No memories yet → the index may not exist → results is null + return (results ?? []).map((r) => r.data.text as string); +} +``` + + + +```python +memories = redis.search.index(name="memories") + +def recall(user_id: str, query: str, limit: int = 5) -> list[str]: + results = memories.query(filter={"text": query, "userId": user_id}, limit=limit) + + # No memories yet → the index may not exist → results is None + return [r.data["text"] for r in (results or [])] +``` + + + + + +To bias recall toward recent memories, you can boost the score with the +`createdAt` field using a [score function](/docs/redis/search/querying#4-score-function), +or sort with `orderBy` / `order_by`. We keep plain relevance ranking here for +simplicity. + + +## Step 4: Remember new facts + +After each exchange we ask the model to pull out durable facts — things worth +remembering across sessions, not small talk. Each fact becomes a JSON document +under the `memory:` prefix, so the index picks it up automatically. + +Because full-text search gives us a cheap similarity check, we **deduplicate** +before writing: if a very similar memory already exists for this user, we skip it. + + + + +```ts +import OpenAI from "openai"; + +const openai = new OpenAI(); + +// Heuristic: full-text scores are unbounded, so this threshold is tuned by feel. +const DEDUPE_SCORE = 8; + +async function alreadyKnown(userId: string, text: string): Promise { + const hits = await memories.query({ filter: { text, userId }, limit: 1 }); + return !!hits?.length && hits[0].score > DEDUPE_SCORE; +} + +export async function remember(userId: string, conversation: Message[]) { + const completion = await openai.chat.completions.create({ + model: "gpt-4o-mini", + response_format: { type: "json_object" }, + messages: [ + { + role: "system", + content: + "Extract durable facts about the user worth remembering across " + + "sessions (preferences, decisions, personal details). Ignore " + + 'small talk. Respond as JSON: {"facts": ["..."]}. Empty if none.', + }, + { role: "user", content: JSON.stringify(conversation) }, + ], + }); + + const { facts } = JSON.parse(completion.choices[0].message.content ?? '{"facts":[]}'); + + for (const text of facts as string[]) { + if (await alreadyKnown(userId, text)) continue; + const id = crypto.randomUUID(); + await redis.json.set(`memory:${userId}:${id}`, "$", { + text, + userId, + kind: "fact", + createdAt: Date.now(), + }); + } +} +``` + + + +```python +import json +import uuid +import time +from openai import OpenAI + +openai = OpenAI() + +# Heuristic: full-text scores are unbounded, so this threshold is tuned by feel. +DEDUPE_SCORE = 8 + +def already_known(user_id: str, text: str) -> bool: + hits = memories.query(filter={"text": text, "userId": user_id}, limit=1) + return bool(hits) and hits[0].score > DEDUPE_SCORE + +def remember(user_id: str, conversation: list[dict]) -> None: + completion = openai.chat.completions.create( + model="gpt-4o-mini", + response_format={"type": "json_object"}, + messages=[ + { + "role": "system", + "content": ( + "Extract durable facts about the user worth remembering " + "across sessions (preferences, decisions, personal details). " + "Ignore small talk. Respond as JSON: {\"facts\": [\"...\"]}. " + "Empty if none." + ), + }, + {"role": "user", "content": json.dumps(conversation)}, + ], + ) + + facts = json.loads(completion.choices[0].message.content or '{"facts":[]}')["facts"] + + for text in facts: + if already_known(user_id, text): + continue + memory_id = uuid.uuid4().hex + redis.json().set( + f"memory:{user_id}:{memory_id}", + "$", + { + "text": text, + "userId": user_id, + "kind": "fact", + "createdAt": int(time.time() * 1000), + }, + ) +``` + + + + +## Step 5: The chat loop + +Now we wire it together. Each turn: **recall** relevant memories, build a prompt +from those plus the working memory, call the model, persist the updated history, +and **remember** new facts. + + + + +```ts +export async function chat(userId: string, sessionId: string, input: string) { + const [history, recalled] = await Promise.all([ + loadHistory(sessionId), + recall(userId, input), + ]); + + const system = + "You are a helpful assistant. Use the following remembered facts about " + + `the user when relevant:\n${recalled.map((m) => `- ${m}`).join("\n") || "(none yet)"}`; + + const completion = await openai.chat.completions.create({ + model: "gpt-4o-mini", + messages: [ + { role: "system", content: system }, + ...history, + { role: "user", content: input }, + ], + }); + + const reply = completion.choices[0].message.content ?? ""; + + const updated: Message[] = [ + ...history, + { role: "user", content: input }, + { role: "assistant", content: reply }, + ]; + + await saveHistory(sessionId, updated); + await remember(userId, updated); // fire-and-forget in production + + return reply; +} +``` + + + +```python +def chat(user_id: str, session_id: str, user_input: str) -> str: + history = load_history(session_id) + recalled = recall(user_id, user_input) + + facts = "\n".join(f"- {m}" for m in recalled) or "(none yet)" + system = ( + "You are a helpful assistant. Use the following remembered facts " + f"about the user when relevant:\n{facts}" + ) + + completion = openai.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + *history, + {"role": "user", "content": user_input}, + ], + ) + + reply = completion.choices[0].message.content or "" + + updated = history + [ + {"role": "user", "content": user_input}, + {"role": "assistant", "content": reply}, + ] + + save_history(session_id, updated) + remember(user_id, updated) # run in the background in production + + return reply +``` + + + + +## Try it + +Run two sessions for the same user. Even after the first session's working memory +expires, the facts learned there are recalled in the second: + + + + +```ts +await chat("user-1", "session-a", "I'm vegetarian and I love spicy food."); +// ...later, a brand new session... +const reply = await chat("user-1", "session-b", "Suggest a dinner for me."); +console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory +``` + + + +```python +chat("user-1", "session-a", "I'm vegetarian and I love spicy food.") +# ...later, a brand new session... +reply = chat("user-1", "session-b", "Suggest a dinner for me.") +print(reply) # recalls "vegetarian" + "spicy" from long-term memory +``` + + + + +## How it fits together + +* **Working memory** lives under `chat:{sessionId}` with a TTL — fast to read, + self-expiring, scoped to one conversation. +* **Long-term memory** lives under `memory:{userId}:{id}` and is searchable across + sessions through the `memories` index. +* **Recall** uses full-text relevance to surface the facts that matter for the + current message; **remember** extracts and deduplicates new ones. + +## Next steps + +* Add a `kind` such as `"preference"` vs `"event"` and filter recall by it. +* Boost recent memories with a [score function](/docs/redis/search/querying#4-score-function). +* Summarize older working-memory messages instead of dropping them. +* Stream the reply to a chat UI and animate it smoothly — see + [Smooth Text Streaming in AI SDK v5](https://upstash.com/blog/smooth-streaming). +* Learn more about what Redis Search can do in the [Search docs](/docs/redis/search/introduction). + # Deploy a Serverless API with AWS CDK and AWS Lambda Source: https://upstash.com/docs/redis/tutorials/api_with_cdk diff --git a/llms.txt b/llms.txt index 8d415089..15a1b40a 100644 --- a/llms.txt +++ b/llms.txt @@ -787,6 +787,7 @@ - [NOAUTH Authentication Required](https://upstash.com/docs/redis/troubleshooting/no_auth.md) - [Connecting with Read-Only Access](https://upstash.com/docs/redis/troubleshooting/readonly_connection.md) - [ERR XReadGroup is cancelled](https://upstash.com/docs/redis/troubleshooting/stream_pel_limit.md) +- [Agent Memory with Redis Search](https://upstash.com/docs/redis/tutorials/agent_memory.md): Build short-term and long-term memory for AI agents on Upstash Redis. Store working memory with TTLs and recall long-term memories with Redis Search full-text queries. - [Deploy a Serverless API with AWS CDK and AWS Lambda](https://upstash.com/docs/redis/tutorials/api_with_cdk.md) - [Autocomplete API with Serverless Redis](https://upstash.com/docs/redis/tutorials/auto_complete_with_serverless_redis.md) - [Build Stateful Applications with AWS App Runner and Serverless Redis](https://upstash.com/docs/redis/tutorials/aws_app_runner_with_redis.md): This tutorial shows how to create a serverless and stateful application using AWS App Runner and Redis From 31a353ed42ccbe51e5d211293d8e7dc8adeed13f Mon Sep 17 00:00:00 2001 From: CahidArda Date: Mon, 25 May 2026 13:43:00 +0300 Subject: [PATCH 3/4] docs: fix Python SDK signatures and indexing notes in search docs and agent memory tutorial --- redis/search/getting-started.mdx | 21 +++++++++-- redis/tutorials/agent_memory.mdx | 65 +++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/redis/search/getting-started.mdx b/redis/search/getting-started.mdx index b3cd1787..441b439e 100644 --- a/redis/search/getting-started.mdx +++ b/redis/search/getting-started.mdx @@ -36,7 +36,7 @@ redis = Redis.from_env() index = redis.search.create_index( name="products", data_type="json", - prefix="product:", + prefixes="product:", schema={ "name": "TEXT", "description": "TEXT", @@ -56,10 +56,23 @@ SEARCH.CREATE products ON JSON PREFIX 1 product: SCHEMA name TEXT description TE + +Create an index once, not on every request. `createIndex` throws if an index with +the same name already exists. To make setup safely re-runnable, pass `exists_ok=True` +in the Python SDK, or wrap the call in a `try/catch` in TypeScript. + + ## 2. Add Data Add data using standard Redis JSON commands. Any key matching the index prefix will be automatically indexed. + +Writes are indexed asynchronously: a `JSON.SET` returns before the document is +searchable. For demos and tests, call `waitIndexing()` / `wait_indexing()` to +block until pending updates are applied. In production, queries running on a later +request will normally hit an up-to-date index without waiting. + + @@ -80,12 +93,14 @@ await redis.json.set("product:2", "$", { price: 129.99, inStock: true, }); + +await index.waitIndexing(); ``` ```python -redis.json().set("product:1", "$", { +redis.json.set("product:1", "$", { "name": "Wireless Headphones", "description": "Premium noise-cancelling wireless headphones with 30-hour battery life", "category": "electronics", @@ -93,7 +108,7 @@ redis.json().set("product:1", "$", { "inStock": True, }) -redis.json().set("product:2", "$", { +redis.json.set("product:2", "$", { "name": "Running Shoes", "description": "Lightweight running shoes with advanced cushioning technology", "category": "sports", diff --git a/redis/tutorials/agent_memory.mdx b/redis/tutorials/agent_memory.mdx index 91d807d0..5c9635fe 100644 --- a/redis/tutorials/agent_memory.mdx +++ b/redis/tutorials/agent_memory.mdx @@ -11,9 +11,9 @@ the prompt. In this tutorial we build a small but complete **agent memory** layer on Upstash Redis, with two tiers: -- **Working memory** — the running conversation for the current session, stored +- **Working memory**: the running conversation for the current session, stored in a single Redis key with a TTL so it expires on its own. -- **Long-term memory** — durable facts about the user (preferences, events, +- **Long-term memory**: durable facts about the user (preferences, events, decisions) stored as JSON documents and recalled with [Redis Search](/redis/search/introduction) full-text queries. @@ -22,7 +22,7 @@ those plus the recent conversation, then **remembers** any new facts worth keepi This tutorial uses OpenAI for the chat and fact-extraction calls, but the memory -layer itself is model-agnostic — swap in any LLM you like. +layer itself is model-agnostic, so swap in any LLM you like. ## Prerequisites @@ -63,7 +63,7 @@ the `text` field for full-text recall, and keep `userId` and `kind` as exact-mat keywords so we can scope a search to a single user. `createdAt` is a sortable number we can use to favor recent memories. -Create the index **once** (e.g. in a setup script) — not on every request. +Create the index **once** (e.g. in a setup script), not on every request. @@ -74,17 +74,21 @@ import { Redis, s } from "@upstash/redis"; const redis = Redis.fromEnv(); -await redis.search.createIndex({ - name: "memories", - dataType: "json", - prefix: "memory:", - schema: s.object({ - text: s.string(), // full-text searchable fact - userId: s.keyword(), // exact-match owner - kind: s.keyword(), // "preference" | "event" | "fact" ... - createdAt: s.number(), // epoch ms, sortable - }), -}); +try { + await redis.search.createIndex({ + name: "memories", + dataType: "json", + prefix: "memory:", + schema: s.object({ + text: s.string(), // full-text searchable fact + userId: s.keyword(), // exact-match owner + kind: s.keyword(), // "preference" | "event" | "fact" ... + createdAt: s.number(), // epoch ms, sortable + }), + }); +} catch { + // Index already exists, safe to ignore when re-running setup. +} ``` @@ -98,7 +102,8 @@ redis = Redis.from_env() redis.search.create_index( name="memories", data_type="json", - prefix="memory:", + prefixes="memory:", + exists_ok=True, # idempotent: don't error if the index already exists schema={ "text": "TEXT", # full-text searchable fact "userId": "KEYWORD", # exact-match owner @@ -220,7 +225,7 @@ simplicity. ## Step 4: Remember new facts -After each exchange we ask the model to pull out durable facts — things worth +After each exchange we ask the model to pull out durable facts, the things worth remembering across sessions, not small talk. Each fact becomes a JSON document under the `memory:` prefix, so the index picks it up automatically. @@ -317,7 +322,7 @@ def remember(user_id: str, conversation: list[dict]) -> None: if already_known(user_id, text): continue memory_id = uuid.uuid4().hex - redis.json().set( + redis.json.set( f"memory:{user_id}:{memory_id}", "$", { @@ -424,7 +429,11 @@ expires, the facts learned there are recalled in the second: ```ts await chat("user-1", "session-a", "I'm vegetarian and I love spicy food."); -// ...later, a brand new session... + +// Redis Search indexes writes asynchronously, wait so the demo is deterministic. +await memories.waitIndexing(); + +// ...a brand new session... const reply = await chat("user-1", "session-b", "Suggest a dinner for me."); console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory ``` @@ -433,7 +442,11 @@ console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory ```python chat("user-1", "session-a", "I'm vegetarian and I love spicy food.") -# ...later, a brand new session... + +# Redis Search indexes writes asynchronously, wait so the demo is deterministic. +memories.wait_indexing() + +# ...a brand new session... reply = chat("user-1", "session-b", "Suggest a dinner for me.") print(reply) # recalls "vegetarian" + "spicy" from long-term memory ``` @@ -441,9 +454,17 @@ print(reply) # recalls "vegetarian" + "spicy" from long-term memory + +Redis Search indexes writes asynchronously: a `JSON.SET` returns before the +document is searchable. For a deterministic demo or test, call `waitIndexing()` / +`wait_indexing()` to block until pending updates are applied. In a real app the +next user turn normally arrives later than the indexing window, so an explicit +wait isn't needed. + + ## How it fits together -- **Working memory** lives under `chat:{sessionId}` with a TTL — fast to read, +- **Working memory** lives under `chat:{sessionId}` with a TTL: fast to read, self-expiring, scoped to one conversation. - **Long-term memory** lives under `memory:{userId}:{id}` and is searchable across sessions through the `memories` index. @@ -455,6 +476,6 @@ print(reply) # recalls "vegetarian" + "spicy" from long-term memory - Add a `kind` such as `"preference"` vs `"event"` and filter recall by it. - Boost recent memories with a [score function](/redis/search/querying#4-score-function). - Summarize older working-memory messages instead of dropping them. -- Stream the reply to a chat UI and animate it smoothly — see +- Stream the reply to a chat UI and animate it smoothly. See [Smooth Text Streaming in AI SDK v5](https://upstash.com/blog/smooth-streaming). - Learn more about what Redis Search can do in the [Search docs](/redis/search/introduction). From 5cd1e876eb47590b4dd7fddcc8a34aaa3a0439df Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 May 2026 10:43:40 +0000 Subject: [PATCH 4/4] chore(llms): regenerate llms.txt and llms-full.txt --- llms-full.txt | 86 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 25 deletions(-) diff --git a/llms-full.txt b/llms-full.txt index c348095c..9884e52c 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -24452,7 +24452,7 @@ redis = Redis.from_env() index = redis.search.create_index( name="products", data_type="json", - prefix="product:", + prefixes="product:", schema={ "name": "TEXT", "description": "TEXT", @@ -24472,10 +24472,23 @@ SEARCH.CREATE products ON JSON PREFIX 1 product: SCHEMA name TEXT description TE + +Create an index once, not on every request. `createIndex` throws if an index with +the same name already exists. To make setup safely re-runnable, pass `exists_ok=True` +in the Python SDK, or wrap the call in a `try/catch` in TypeScript. + + ## 2. Add Data Add data using standard Redis JSON commands. Any key matching the index prefix will be automatically indexed. + +Writes are indexed asynchronously: a `JSON.SET` returns before the document is +searchable. For demos and tests, call `waitIndexing()` / `wait_indexing()` to +block until pending updates are applied. In production, queries running on a later +request will normally hit an up-to-date index without waiting. + + @@ -24496,12 +24509,14 @@ await redis.json.set("product:2", "$", { price: 129.99, inStock: true, }); + +await index.waitIndexing(); ``` ```python -redis.json().set("product:1", "$", { +redis.json.set("product:1", "$", { "name": "Wireless Headphones", "description": "Premium noise-cancelling wireless headphones with 30-hour battery life", "category": "electronics", @@ -24509,7 +24524,7 @@ redis.json().set("product:1", "$", { "inStock": True, }) -redis.json().set("product:2", "$", { +redis.json.set("product:2", "$", { "name": "Running Shoes", "description": "Lightweight running shoes with advanced cushioning technology", "category": "sports", @@ -28276,9 +28291,9 @@ the prompt. In this tutorial we build a small but complete **agent memory** layer on Upstash Redis, with two tiers: -* **Working memory** — the running conversation for the current session, stored +* **Working memory**: the running conversation for the current session, stored in a single Redis key with a TTL so it expires on its own. -* **Long-term memory** — durable facts about the user (preferences, events, +* **Long-term memory**: durable facts about the user (preferences, events, decisions) stored as JSON documents and recalled with [Redis Search](/docs/redis/search/introduction) full-text queries. @@ -28287,7 +28302,7 @@ those plus the recent conversation, then **remembers** any new facts worth keepi This tutorial uses OpenAI for the chat and fact-extraction calls, but the memory -layer itself is model-agnostic — swap in any LLM you like. +layer itself is model-agnostic, so swap in any LLM you like. ## Prerequisites @@ -28328,7 +28343,7 @@ the `text` field for full-text recall, and keep `userId` and `kind` as exact-mat keywords so we can scope a search to a single user. `createdAt` is a sortable number we can use to favor recent memories. -Create the index **once** (e.g. in a setup script) — not on every request. +Create the index **once** (e.g. in a setup script), not on every request. @@ -28339,17 +28354,21 @@ import { Redis, s } from "@upstash/redis"; const redis = Redis.fromEnv(); -await redis.search.createIndex({ - name: "memories", - dataType: "json", - prefix: "memory:", - schema: s.object({ - text: s.string(), // full-text searchable fact - userId: s.keyword(), // exact-match owner - kind: s.keyword(), // "preference" | "event" | "fact" ... - createdAt: s.number(), // epoch ms, sortable - }), -}); +try { + await redis.search.createIndex({ + name: "memories", + dataType: "json", + prefix: "memory:", + schema: s.object({ + text: s.string(), // full-text searchable fact + userId: s.keyword(), // exact-match owner + kind: s.keyword(), // "preference" | "event" | "fact" ... + createdAt: s.number(), // epoch ms, sortable + }), + }); +} catch { + // Index already exists, safe to ignore when re-running setup. +} ``` @@ -28363,7 +28382,8 @@ redis = Redis.from_env() redis.search.create_index( name="memories", data_type="json", - prefix="memory:", + prefixes="memory:", + exists_ok=True, # idempotent: don't error if the index already exists schema={ "text": "TEXT", # full-text searchable fact "userId": "KEYWORD", # exact-match owner @@ -28482,7 +28502,7 @@ simplicity. ## Step 4: Remember new facts -After each exchange we ask the model to pull out durable facts — things worth +After each exchange we ask the model to pull out durable facts, the things worth remembering across sessions, not small talk. Each fact becomes a JSON document under the `memory:` prefix, so the index picks it up automatically. @@ -28577,7 +28597,7 @@ def remember(user_id: str, conversation: list[dict]) -> None: if already_known(user_id, text): continue memory_id = uuid.uuid4().hex - redis.json().set( + redis.json.set( f"memory:{user_id}:{memory_id}", "$", { @@ -28684,7 +28704,11 @@ expires, the facts learned there are recalled in the second: ```ts await chat("user-1", "session-a", "I'm vegetarian and I love spicy food."); -// ...later, a brand new session... + +// Redis Search indexes writes asynchronously, wait so the demo is deterministic. +await memories.waitIndexing(); + +// ...a brand new session... const reply = await chat("user-1", "session-b", "Suggest a dinner for me."); console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory ``` @@ -28693,7 +28717,11 @@ console.log(reply); // recalls "vegetarian" + "spicy" from long-term memory ```python chat("user-1", "session-a", "I'm vegetarian and I love spicy food.") -# ...later, a brand new session... + +# Redis Search indexes writes asynchronously, wait so the demo is deterministic. +memories.wait_indexing() + +# ...a brand new session... reply = chat("user-1", "session-b", "Suggest a dinner for me.") print(reply) # recalls "vegetarian" + "spicy" from long-term memory ``` @@ -28701,9 +28729,17 @@ print(reply) # recalls "vegetarian" + "spicy" from long-term memory + +Redis Search indexes writes asynchronously: a `JSON.SET` returns before the +document is searchable. For a deterministic demo or test, call `waitIndexing()` / +`wait_indexing()` to block until pending updates are applied. In a real app the +next user turn normally arrives later than the indexing window, so an explicit +wait isn't needed. + + ## How it fits together -* **Working memory** lives under `chat:{sessionId}` with a TTL — fast to read, +* **Working memory** lives under `chat:{sessionId}` with a TTL: fast to read, self-expiring, scoped to one conversation. * **Long-term memory** lives under `memory:{userId}:{id}` and is searchable across sessions through the `memories` index. @@ -28715,7 +28751,7 @@ print(reply) # recalls "vegetarian" + "spicy" from long-term memory * Add a `kind` such as `"preference"` vs `"event"` and filter recall by it. * Boost recent memories with a [score function](/docs/redis/search/querying#4-score-function). * Summarize older working-memory messages instead of dropping them. -* Stream the reply to a chat UI and animate it smoothly — see +* Stream the reply to a chat UI and animate it smoothly. See [Smooth Text Streaming in AI SDK v5](https://upstash.com/blog/smooth-streaming). * Learn more about what Redis Search can do in the [Search docs](/docs/redis/search/introduction).