perf: optimize Keepa UPC lookups with lightweight queries and caching

Reduces API token consumption by disabling stats and buybox data for UPC-to-ASIN mapping requests. Additionally, introduces a run-level cache to avoid redundant lookups for the same UPC across different batch chunks.
This commit is contained in:
Victor Noguera
2026-04-17 01:41:01 -04:00
parent 072a501102
commit 9b832b7839
3 changed files with 103 additions and 10 deletions

View File

@@ -198,3 +198,44 @@ test("lookupKeepaUpcs retries on 429 and succeeds after refill wait", async () =
expect(details.get(targetUpc)?.status).toBe("found");
expect(details.get(targetUpc)?.asin).toBe("B000RETRY01");
});
test("lookupKeepaUpcs uses lightweight query params for code mapping", async () => {
const targetUpc = "555555555555";
const fetchMock = mock(async (input: string | URL | Request) => {
const rawUrl =
typeof input === "string"
? input
: input instanceof URL
? input.toString()
: input.url;
const url = new URL(rawUrl);
expect(url.searchParams.get("code")).toBe(targetUpc);
expect(url.searchParams.has("stats")).toBe(false);
expect(url.searchParams.has("buybox")).toBe(false);
expect(url.searchParams.has("days")).toBe(false);
return new Response(
JSON.stringify({
products: [
{
asin: "B000LIGHT01",
upcList: [targetUpc],
categoryTree: [{ name: "Test Category" }],
},
],
tokensLeft: 10,
refillRate: 21,
}),
{ status: 200 },
);
});
globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch;
const details = await lookupKeepaUpcs([targetUpc]);
expect(fetchMock.mock.calls.length).toBe(1);
expect(details.get(targetUpc)?.status).toBe("found");
expect(details.get(targetUpc)?.asin).toBe("B000LIGHT01");
});

View File

@@ -17,9 +17,9 @@ type KeepaApiResponse = {
refillIn?: number;
};
// Token-based rate limiting: Keepa Pro = 1 token/min regeneration.
// Each product request costs 1 token regardless of ASIN count (up to 100).
// The API response includes tokensLeft and refillRate — we use those to pace.
// Token-based rate limiting based on Keepa's tokensLeft/refillRate response fields.
// Actual token cost can be greater than 1 depending on endpoint parameters and payload.
// The client keeps request pace using tokensLeft/refillRate/refillIn to avoid 429 bursts.
let tokensLeft = 1; // Conservative start; updated from API response
let refillRate = 1; // tokens per minute, updated from API response
let lastRequestTime = 0;
@@ -53,14 +53,30 @@ function wait(ms: number): Promise<void> {
function buildProductUrl(
queryParam: "asin" | "code",
values: string[],
options?: {
includeStats?: boolean;
includeBuybox?: boolean;
days?: number;
},
): string {
const includeStats = options?.includeStats ?? true;
const includeBuybox = options?.includeBuybox ?? true;
const days = options?.days ?? 90;
const params = new URLSearchParams({
key: config.keepaApiKey,
domain: "1",
stats: "90",
buybox: "1",
days: "90",
});
if (includeStats) {
params.set("stats", String(days));
params.set("days", String(days));
}
if (includeBuybox) {
params.set("buybox", "1");
}
params.set(queryParam, values.join(","));
return `${KEEPA_BASE}/product?${params.toString()}`;
}
@@ -216,7 +232,11 @@ export async function fetchKeepaDataBatch(
// Split into chunks of MAX_ASINS_PER_REQUEST
for (let i = 0; i < asins.length; i += MAX_ASINS_PER_REQUEST) {
const chunk = asins.slice(i, i + MAX_ASINS_PER_REQUEST);
const url = buildProductUrl("asin", chunk);
const url = buildProductUrl("asin", chunk, {
includeStats: true,
includeBuybox: true,
days: 90,
});
console.log(
`Keepa: fetching ${chunk.length} ASINs (tokens left: ${tokensLeft})...`,
@@ -271,7 +291,10 @@ export async function lookupKeepaUpcs(
for (let i = 0; i < validUpcs.length; i += MAX_CODES_PER_REQUEST) {
const chunk = validUpcs.slice(i, i + MAX_CODES_PER_REQUEST);
const chunkSet = new Set(chunk);
const url = buildProductUrl("code", chunk);
const url = buildProductUrl("code", chunk, {
includeStats: false,
includeBuybox: false,
});
console.log(
`Keepa: mapping ${chunk.length} UPCs to ASINs (tokens left: ${tokensLeft})...`,

View File

@@ -162,11 +162,28 @@ function createStatusCounter(): Record<KeepaUpcLookupStatus, number> {
async function lookupUpcsWithChunking(
rows: UpcInputRow[],
lookupBatchSize: number,
runCache: Map<string, KeepaUpcLookupDetail>,
): Promise<Map<string, KeepaUpcLookupDetail>> {
const uniqueUpcs = Array.from(new Set(rows.map((row) => row.upc)));
const chunks = chunkArray(uniqueUpcs, lookupBatchSize);
const missingUpcs = uniqueUpcs.filter((upc) => !runCache.has(upc));
const chunks = chunkArray(missingUpcs, lookupBatchSize);
const details = new Map<string, KeepaUpcLookupDetail>();
const cacheHits = uniqueUpcs.length - missingUpcs.length;
if (cacheHits > 0) {
console.log(
` Reusing cached UPC lookup results for ${cacheHits}/${uniqueUpcs.length} UPCs in this batch.`,
);
}
if (missingUpcs.length === 0) {
for (const upc of uniqueUpcs) {
const detail = runCache.get(upc);
if (detail) details.set(upc, detail);
}
return details;
}
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i]!;
console.log(
@@ -175,6 +192,13 @@ async function lookupUpcsWithChunking(
const chunkDetails = await lookupKeepaUpcs(chunk);
for (const [upc, detail] of chunkDetails.entries()) {
runCache.set(upc, detail);
}
}
for (const upc of uniqueUpcs) {
const detail = runCache.get(upc);
if (detail) {
details.set(upc, detail);
}
}
@@ -222,6 +246,7 @@ export async function runUpcFileAnalysis(
const unresolvedByStatus = createStatusCounter();
const printableSample = [];
const upcLookupCache = new Map<string, KeepaUpcLookupDetail>();
let processedRows = 0;
let matchedRows = 0;
@@ -236,7 +261,11 @@ export async function runUpcFileAnalysis(
);
processedRows += rows.length;
const detailMap = await lookupUpcsWithChunking(rows, lookupBatchSize);
const detailMap = await lookupUpcsWithChunking(
rows,
lookupBatchSize,
upcLookupCache,
);
const matchedProducts: ProductRecord[] = [];
for (const row of rows) {