feat: add UPC to ASIN mapping and large file UPC analysis

Introduces the capability to resolve UPCs to ASINs using the Keepa API. This includes a new `upc-file` command for processing large Excel files of UPCs, a `upc` CLI tool for quick lookups, and API endpoints for web-based integration. The analysis pipeline was refactored into a reusable module to support both standard ASIN leads and new UPC-driven workflows.
This commit is contained in:
Victor Noguera
2026-04-16 23:06:55 -04:00
parent d25cf5d5ec
commit 32e7b0c485
14 changed files with 2278 additions and 250 deletions

View File

@@ -1,10 +1,21 @@
import { config } from "./config.ts";
import type { KeepaData } from "./types.ts";
import type { KeepaData, KeepaUpcLookupDetail } from "./types.ts";
const KEEPA_BASE = "https://api.keepa.com";
const MAX_ASINS_PER_REQUEST = 100;
const MAX_CODES_PER_REQUEST = MAX_ASINS_PER_REQUEST;
const MAX_KEEPA_RETRIES = 4;
const KEEP_RETRY_BUFFER_MS = 250;
const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER";
const KEEPA_MINUTES_OFFSET = 21_564_000;
const UPC_PATTERN = /^\d{12,14}$/;
type KeepaApiResponse = {
products?: Record<string, any>[];
tokensLeft?: number;
refillRate?: number;
refillIn?: number;
};
// Token-based rate limiting: Keepa Pro = 1 token/min regeneration.
// Each product request costs 1 token regardless of ASIN count (up to 100).
@@ -35,6 +46,168 @@ async function waitForToken(): Promise<void> {
tokensLeft = 1;
}
function wait(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function buildProductUrl(
queryParam: "asin" | "code",
values: string[],
): string {
const params = new URLSearchParams({
key: config.keepaApiKey,
domain: "1",
stats: "90",
buybox: "1",
days: "90",
});
params.set(queryParam, values.join(","));
return `${KEEPA_BASE}/product?${params.toString()}`;
}
function updateTokenState(data: KeepaApiResponse): void {
if (data.tokensLeft != null) tokensLeft = data.tokensLeft;
if (data.refillRate != null) refillRate = data.refillRate;
}
function computeWaitMsFromRefill(refillIn?: number): number {
if (
typeof refillIn === "number" &&
Number.isFinite(refillIn) &&
refillIn >= 0
) {
return Math.max(
Math.ceil(refillIn) + KEEP_RETRY_BUFFER_MS,
KEEP_RETRY_BUFFER_MS,
);
}
const safeRefillRate = Math.max(1, refillRate);
return Math.ceil((1 / safeRefillRate) * 60_000) + KEEP_RETRY_BUFFER_MS;
}
function parseErrorPayload(text: string): KeepaApiResponse | null {
try {
const parsed = JSON.parse(text) as KeepaApiResponse;
return parsed && typeof parsed === "object" ? parsed : null;
} catch {
return null;
}
}
async function fetchKeepaWithRetries(
url: string,
operationLabel: string,
): Promise<KeepaApiResponse> {
let lastErrorMessage = "Unknown Keepa error";
for (let attempt = 1; attempt <= MAX_KEEPA_RETRIES; attempt++) {
await waitForToken();
const res = await fetch(url);
lastRequestTime = Date.now();
if (res.ok) {
const data = (await res.json()) as KeepaApiResponse;
updateTokenState(data);
return data;
}
const text = await res.text();
const payload = parseErrorPayload(text);
if (payload) {
updateTokenState(payload);
}
lastErrorMessage = `Keepa API error ${res.status}: ${text}`;
if (res.status !== 429 || attempt === MAX_KEEPA_RETRIES) {
break;
}
const waitMs = computeWaitMsFromRefill(payload?.refillIn);
tokensLeft = Math.min(tokensLeft, 0);
console.warn(
`Keepa throttled during ${operationLabel} (attempt ${attempt}/${MAX_KEEPA_RETRIES}). Waiting ${Math.ceil(waitMs / 1000)}s before retry...`,
);
await wait(waitMs);
}
throw new Error(lastErrorMessage);
}
function normalizeUpc(input: string): string {
return input.trim();
}
function isValidUpc(value: string): boolean {
return UPC_PATTERN.test(value);
}
function normalizeCodeFromKeepa(value: string): string {
return value.replace(/\D/g, "");
}
function collectCodes(value: unknown, target: Set<string>): void {
if (Array.isArray(value)) {
for (const item of value) {
collectCodes(item, target);
}
return;
}
if (typeof value === "number" && Number.isFinite(value)) {
const normalized = normalizeCodeFromKeepa(String(Math.trunc(value)));
if (isValidUpc(normalized)) target.add(normalized);
return;
}
if (typeof value !== "string") {
return;
}
for (const rawPart of value.split(/[\s,;|]+/)) {
if (!rawPart) continue;
const normalized = normalizeCodeFromKeepa(rawPart);
if (isValidUpc(normalized)) target.add(normalized);
}
}
function extractUpcsFromProduct(product: Record<string, any>): string[] {
const codes = new Set<string>();
const candidates: unknown[] = [
product.upcList,
product.upc,
product.eanList,
product.ean,
product.gtinList,
product.gtin,
];
for (const candidate of candidates) {
collectCodes(candidate, codes);
}
return Array.from(codes);
}
function buildFailureDetail(
upc: string,
status: "invalid_upc" | "not_found" | "multiple_asins" | "request_failed",
reason: string,
candidateAsins: string[] = [],
): KeepaUpcLookupDetail {
return {
requestedUpc: upc,
normalizedUpc: upc,
status,
asin: null,
candidateAsins,
keepaData: null,
reason,
};
}
export async function fetchKeepaDataBatch(
asins: string[],
): Promise<Map<string, KeepaData>> {
@@ -43,32 +216,13 @@ export async function fetchKeepaDataBatch(
// Split into chunks of MAX_ASINS_PER_REQUEST
for (let i = 0; i < asins.length; i += MAX_ASINS_PER_REQUEST) {
const chunk = asins.slice(i, i + MAX_ASINS_PER_REQUEST);
await waitForToken();
const asinParam = chunk.join(",");
const url = `${KEEPA_BASE}/product?key=${config.keepaApiKey}&domain=1&asin=${asinParam}&stats=90&buybox=1&days=90`;
const url = buildProductUrl("asin", chunk);
console.log(
`Keepa: fetching ${chunk.length} ASINs (tokens left: ${tokensLeft})...`,
);
const res = await fetch(url);
lastRequestTime = Date.now();
if (!res.ok) {
const text = await res.text();
throw new Error(`Keepa API error ${res.status}: ${text}`);
}
const data = (await res.json()) as {
products?: Record<string, any>[];
tokensLeft?: number;
refillRate?: number;
};
// Update token state from API response
if (data.tokensLeft != null) tokensLeft = data.tokensLeft;
if (data.refillRate != null) refillRate = data.refillRate;
const data = await fetchKeepaWithRetries(url, "ASIN batch fetch");
console.log(
`Keepa: ${data.products?.length ?? 0} products returned, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
@@ -86,6 +240,133 @@ export async function fetchKeepaDataBatch(
return results;
}
export async function lookupKeepaUpcs(
upcs: string[],
): Promise<Map<string, KeepaUpcLookupDetail>> {
const details = new Map<string, KeepaUpcLookupDetail>();
const validUpcs: string[] = [];
const seenValid = new Set<string>();
for (const rawUpc of upcs) {
const normalized = normalizeUpc(rawUpc);
if (!isValidUpc(normalized)) {
if (!details.has(normalized)) {
details.set(
normalized,
buildFailureDetail(
normalized,
"invalid_upc",
"UPC must be 12, 13, or 14 digits",
),
);
}
continue;
}
if (seenValid.has(normalized)) continue;
seenValid.add(normalized);
validUpcs.push(normalized);
}
for (let i = 0; i < validUpcs.length; i += MAX_CODES_PER_REQUEST) {
const chunk = validUpcs.slice(i, i + MAX_CODES_PER_REQUEST);
const chunkSet = new Set(chunk);
const url = buildProductUrl("code", chunk);
console.log(
`Keepa: mapping ${chunk.length} UPCs to ASINs (tokens left: ${tokensLeft})...`,
);
try {
const data = await fetchKeepaWithRetries(url, "UPC code lookup");
console.log(
`Keepa: ${data.products?.length ?? 0} products returned for UPC query, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
);
const byUpc = new Map<string, Map<string, KeepaData>>();
for (const product of data.products ?? []) {
const asin = String(product.asin ?? "").trim();
if (!asin) continue;
const keepaData = parseKeepaProduct(product);
const productUpcs = extractUpcsFromProduct(product);
for (const upc of productUpcs) {
if (!chunkSet.has(upc)) continue;
if (!byUpc.has(upc)) byUpc.set(upc, new Map());
byUpc.get(upc)!.set(asin, keepaData);
}
}
for (const upc of chunk) {
const asinMap = byUpc.get(upc);
if (!asinMap || asinMap.size === 0) {
details.set(
upc,
buildFailureDetail(
upc,
"not_found",
"No Keepa product matched this UPC",
),
);
continue;
}
const candidateAsins = Array.from(asinMap.keys());
if (candidateAsins.length > 1) {
details.set(
upc,
buildFailureDetail(
upc,
"multiple_asins",
`UPC matched multiple ASINs (${candidateAsins.length})`,
candidateAsins,
),
);
continue;
}
const asin = candidateAsins[0]!;
details.set(upc, {
requestedUpc: upc,
normalizedUpc: upc,
status: "found",
asin,
candidateAsins: [asin],
keepaData: asinMap.get(asin) ?? null,
});
}
} catch (error) {
const reason = error instanceof Error ? error.message : String(error);
console.warn(
`Keepa UPC chunk failed (offset ${i}, size ${chunk.length}): ${reason}`,
);
for (const upc of chunk) {
details.set(upc, buildFailureDetail(upc, "request_failed", reason));
}
}
}
return details;
}
export async function mapUpcsToAsins(
upcs: string[],
): Promise<Map<string, string>> {
const details = await lookupKeepaUpcs(upcs);
const mapping = new Map<string, string>();
for (const [upc, detail] of details.entries()) {
if (detail.status === "found" && detail.asin) {
mapping.set(upc, detail.asin);
}
}
return mapping;
}
function parseKeepaProduct(product: Record<string, any>): KeepaData {
const stats = product.stats;
const csv = product.csv;