Introduces the capability to resolve UPCs to ASINs using the Keepa API. This includes a new `upc-file` command for processing large Excel files of UPCs, a `upc` CLI tool for quick lookups, and API endpoints for web-based integration. The analysis pipeline was refactored into a reusable module to support both standard ASIN leads and new UPC-driven workflows.
540 lines
16 KiB
TypeScript
540 lines
16 KiB
TypeScript
import { config } from "./config.ts";
|
|
import type { KeepaData, KeepaUpcLookupDetail } from "./types.ts";
|
|
|
|
const KEEPA_BASE = "https://api.keepa.com";
|
|
const MAX_ASINS_PER_REQUEST = 100;
|
|
const MAX_CODES_PER_REQUEST = MAX_ASINS_PER_REQUEST;
|
|
const MAX_KEEPA_RETRIES = 4;
|
|
const KEEP_RETRY_BUFFER_MS = 250;
|
|
const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER";
|
|
const KEEPA_MINUTES_OFFSET = 21_564_000;
|
|
const UPC_PATTERN = /^\d{12,14}$/;
|
|
|
|
type KeepaApiResponse = {
|
|
products?: Record<string, any>[];
|
|
tokensLeft?: number;
|
|
refillRate?: number;
|
|
refillIn?: number;
|
|
};
|
|
|
|
// Token-based rate limiting: Keepa Pro = 1 token/min regeneration.
|
|
// Each product request costs 1 token regardless of ASIN count (up to 100).
|
|
// The API response includes tokensLeft and refillRate — we use those to pace.
|
|
let tokensLeft = 1; // Conservative start; updated from API response
|
|
let refillRate = 1; // tokens per minute, updated from API response
|
|
let lastRequestTime = 0;
|
|
|
|
async function waitForToken(): Promise<void> {
|
|
if (tokensLeft > 0) return;
|
|
|
|
const elapsed = (Date.now() - lastRequestTime) / 60_000; // minutes
|
|
const regenerated = Math.floor(elapsed * refillRate);
|
|
if (regenerated > 0) {
|
|
tokensLeft += regenerated;
|
|
return;
|
|
}
|
|
|
|
// Wait until we regenerate at least 1 token
|
|
const waitMs =
|
|
Math.ceil((1 / refillRate) * 60_000) - (Date.now() - lastRequestTime);
|
|
if (waitMs > 0) {
|
|
console.log(
|
|
`Keepa tokens exhausted. Waiting ${Math.ceil(waitMs / 1000)}s for token regeneration...`,
|
|
);
|
|
await new Promise((r) => setTimeout(r, waitMs));
|
|
}
|
|
tokensLeft = 1;
|
|
}
|
|
|
|
function wait(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function buildProductUrl(
|
|
queryParam: "asin" | "code",
|
|
values: string[],
|
|
): string {
|
|
const params = new URLSearchParams({
|
|
key: config.keepaApiKey,
|
|
domain: "1",
|
|
stats: "90",
|
|
buybox: "1",
|
|
days: "90",
|
|
});
|
|
params.set(queryParam, values.join(","));
|
|
return `${KEEPA_BASE}/product?${params.toString()}`;
|
|
}
|
|
|
|
function updateTokenState(data: KeepaApiResponse): void {
|
|
if (data.tokensLeft != null) tokensLeft = data.tokensLeft;
|
|
if (data.refillRate != null) refillRate = data.refillRate;
|
|
}
|
|
|
|
function computeWaitMsFromRefill(refillIn?: number): number {
|
|
if (
|
|
typeof refillIn === "number" &&
|
|
Number.isFinite(refillIn) &&
|
|
refillIn >= 0
|
|
) {
|
|
return Math.max(
|
|
Math.ceil(refillIn) + KEEP_RETRY_BUFFER_MS,
|
|
KEEP_RETRY_BUFFER_MS,
|
|
);
|
|
}
|
|
|
|
const safeRefillRate = Math.max(1, refillRate);
|
|
return Math.ceil((1 / safeRefillRate) * 60_000) + KEEP_RETRY_BUFFER_MS;
|
|
}
|
|
|
|
function parseErrorPayload(text: string): KeepaApiResponse | null {
|
|
try {
|
|
const parsed = JSON.parse(text) as KeepaApiResponse;
|
|
return parsed && typeof parsed === "object" ? parsed : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function fetchKeepaWithRetries(
|
|
url: string,
|
|
operationLabel: string,
|
|
): Promise<KeepaApiResponse> {
|
|
let lastErrorMessage = "Unknown Keepa error";
|
|
|
|
for (let attempt = 1; attempt <= MAX_KEEPA_RETRIES; attempt++) {
|
|
await waitForToken();
|
|
|
|
const res = await fetch(url);
|
|
lastRequestTime = Date.now();
|
|
|
|
if (res.ok) {
|
|
const data = (await res.json()) as KeepaApiResponse;
|
|
updateTokenState(data);
|
|
return data;
|
|
}
|
|
|
|
const text = await res.text();
|
|
const payload = parseErrorPayload(text);
|
|
if (payload) {
|
|
updateTokenState(payload);
|
|
}
|
|
|
|
lastErrorMessage = `Keepa API error ${res.status}: ${text}`;
|
|
|
|
if (res.status !== 429 || attempt === MAX_KEEPA_RETRIES) {
|
|
break;
|
|
}
|
|
|
|
const waitMs = computeWaitMsFromRefill(payload?.refillIn);
|
|
tokensLeft = Math.min(tokensLeft, 0);
|
|
console.warn(
|
|
`Keepa throttled during ${operationLabel} (attempt ${attempt}/${MAX_KEEPA_RETRIES}). Waiting ${Math.ceil(waitMs / 1000)}s before retry...`,
|
|
);
|
|
await wait(waitMs);
|
|
}
|
|
|
|
throw new Error(lastErrorMessage);
|
|
}
|
|
|
|
function normalizeUpc(input: string): string {
|
|
return input.trim();
|
|
}
|
|
|
|
function isValidUpc(value: string): boolean {
|
|
return UPC_PATTERN.test(value);
|
|
}
|
|
|
|
function normalizeCodeFromKeepa(value: string): string {
|
|
return value.replace(/\D/g, "");
|
|
}
|
|
|
|
function collectCodes(value: unknown, target: Set<string>): void {
|
|
if (Array.isArray(value)) {
|
|
for (const item of value) {
|
|
collectCodes(item, target);
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (typeof value === "number" && Number.isFinite(value)) {
|
|
const normalized = normalizeCodeFromKeepa(String(Math.trunc(value)));
|
|
if (isValidUpc(normalized)) target.add(normalized);
|
|
return;
|
|
}
|
|
|
|
if (typeof value !== "string") {
|
|
return;
|
|
}
|
|
|
|
for (const rawPart of value.split(/[\s,;|]+/)) {
|
|
if (!rawPart) continue;
|
|
const normalized = normalizeCodeFromKeepa(rawPart);
|
|
if (isValidUpc(normalized)) target.add(normalized);
|
|
}
|
|
}
|
|
|
|
function extractUpcsFromProduct(product: Record<string, any>): string[] {
|
|
const codes = new Set<string>();
|
|
const candidates: unknown[] = [
|
|
product.upcList,
|
|
product.upc,
|
|
product.eanList,
|
|
product.ean,
|
|
product.gtinList,
|
|
product.gtin,
|
|
];
|
|
|
|
for (const candidate of candidates) {
|
|
collectCodes(candidate, codes);
|
|
}
|
|
|
|
return Array.from(codes);
|
|
}
|
|
|
|
function buildFailureDetail(
|
|
upc: string,
|
|
status: "invalid_upc" | "not_found" | "multiple_asins" | "request_failed",
|
|
reason: string,
|
|
candidateAsins: string[] = [],
|
|
): KeepaUpcLookupDetail {
|
|
return {
|
|
requestedUpc: upc,
|
|
normalizedUpc: upc,
|
|
status,
|
|
asin: null,
|
|
candidateAsins,
|
|
keepaData: null,
|
|
reason,
|
|
};
|
|
}
|
|
|
|
export async function fetchKeepaDataBatch(
|
|
asins: string[],
|
|
): Promise<Map<string, KeepaData>> {
|
|
const results = new Map<string, KeepaData>();
|
|
|
|
// Split into chunks of MAX_ASINS_PER_REQUEST
|
|
for (let i = 0; i < asins.length; i += MAX_ASINS_PER_REQUEST) {
|
|
const chunk = asins.slice(i, i + MAX_ASINS_PER_REQUEST);
|
|
const url = buildProductUrl("asin", chunk);
|
|
|
|
console.log(
|
|
`Keepa: fetching ${chunk.length} ASINs (tokens left: ${tokensLeft})...`,
|
|
);
|
|
|
|
const data = await fetchKeepaWithRetries(url, "ASIN batch fetch");
|
|
|
|
console.log(
|
|
`Keepa: ${data.products?.length ?? 0} products returned, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
|
|
);
|
|
|
|
if (data.products) {
|
|
for (const product of data.products) {
|
|
const asin = product.asin;
|
|
if (!asin) continue;
|
|
results.set(asin, parseKeepaProduct(product));
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
export async function lookupKeepaUpcs(
|
|
upcs: string[],
|
|
): Promise<Map<string, KeepaUpcLookupDetail>> {
|
|
const details = new Map<string, KeepaUpcLookupDetail>();
|
|
const validUpcs: string[] = [];
|
|
const seenValid = new Set<string>();
|
|
|
|
for (const rawUpc of upcs) {
|
|
const normalized = normalizeUpc(rawUpc);
|
|
if (!isValidUpc(normalized)) {
|
|
if (!details.has(normalized)) {
|
|
details.set(
|
|
normalized,
|
|
buildFailureDetail(
|
|
normalized,
|
|
"invalid_upc",
|
|
"UPC must be 12, 13, or 14 digits",
|
|
),
|
|
);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (seenValid.has(normalized)) continue;
|
|
seenValid.add(normalized);
|
|
validUpcs.push(normalized);
|
|
}
|
|
|
|
for (let i = 0; i < validUpcs.length; i += MAX_CODES_PER_REQUEST) {
|
|
const chunk = validUpcs.slice(i, i + MAX_CODES_PER_REQUEST);
|
|
const chunkSet = new Set(chunk);
|
|
const url = buildProductUrl("code", chunk);
|
|
|
|
console.log(
|
|
`Keepa: mapping ${chunk.length} UPCs to ASINs (tokens left: ${tokensLeft})...`,
|
|
);
|
|
|
|
try {
|
|
const data = await fetchKeepaWithRetries(url, "UPC code lookup");
|
|
|
|
console.log(
|
|
`Keepa: ${data.products?.length ?? 0} products returned for UPC query, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
|
|
);
|
|
|
|
const byUpc = new Map<string, Map<string, KeepaData>>();
|
|
for (const product of data.products ?? []) {
|
|
const asin = String(product.asin ?? "").trim();
|
|
if (!asin) continue;
|
|
|
|
const keepaData = parseKeepaProduct(product);
|
|
const productUpcs = extractUpcsFromProduct(product);
|
|
|
|
for (const upc of productUpcs) {
|
|
if (!chunkSet.has(upc)) continue;
|
|
if (!byUpc.has(upc)) byUpc.set(upc, new Map());
|
|
byUpc.get(upc)!.set(asin, keepaData);
|
|
}
|
|
}
|
|
|
|
for (const upc of chunk) {
|
|
const asinMap = byUpc.get(upc);
|
|
if (!asinMap || asinMap.size === 0) {
|
|
details.set(
|
|
upc,
|
|
buildFailureDetail(
|
|
upc,
|
|
"not_found",
|
|
"No Keepa product matched this UPC",
|
|
),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
const candidateAsins = Array.from(asinMap.keys());
|
|
if (candidateAsins.length > 1) {
|
|
details.set(
|
|
upc,
|
|
buildFailureDetail(
|
|
upc,
|
|
"multiple_asins",
|
|
`UPC matched multiple ASINs (${candidateAsins.length})`,
|
|
candidateAsins,
|
|
),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
const asin = candidateAsins[0]!;
|
|
details.set(upc, {
|
|
requestedUpc: upc,
|
|
normalizedUpc: upc,
|
|
status: "found",
|
|
asin,
|
|
candidateAsins: [asin],
|
|
keepaData: asinMap.get(asin) ?? null,
|
|
});
|
|
}
|
|
} catch (error) {
|
|
const reason = error instanceof Error ? error.message : String(error);
|
|
console.warn(
|
|
`Keepa UPC chunk failed (offset ${i}, size ${chunk.length}): ${reason}`,
|
|
);
|
|
|
|
for (const upc of chunk) {
|
|
details.set(upc, buildFailureDetail(upc, "request_failed", reason));
|
|
}
|
|
}
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
export async function mapUpcsToAsins(
|
|
upcs: string[],
|
|
): Promise<Map<string, string>> {
|
|
const details = await lookupKeepaUpcs(upcs);
|
|
const mapping = new Map<string, string>();
|
|
|
|
for (const [upc, detail] of details.entries()) {
|
|
if (detail.status === "found" && detail.asin) {
|
|
mapping.set(upc, detail.asin);
|
|
}
|
|
}
|
|
|
|
return mapping;
|
|
}
|
|
|
|
function parseKeepaProduct(product: Record<string, any>): KeepaData {
|
|
const stats = product.stats;
|
|
const csv = product.csv;
|
|
const salesRankDrops30 = pickKeepaNumber(
|
|
product.salesRankDrops30,
|
|
stats?.salesRankDrops30,
|
|
);
|
|
const salesRankDrops90 =
|
|
pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ??
|
|
(salesRankDrops30 != null ? salesRankDrops30 * 3 : null);
|
|
const monthlySold =
|
|
pickKeepaNumber(product.monthlySold, stats?.monthlySold) ??
|
|
salesRankDrops30;
|
|
const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv);
|
|
const amazonBuyboxSharePct90d =
|
|
extractAmazonBuyboxSharePct90d(product, stats) ??
|
|
computeAmazonBuyBoxSharePctFromHistory(
|
|
product.buyBoxSellerIdHistory,
|
|
90,
|
|
new Set([AMAZON_US_SELLER_ID]),
|
|
);
|
|
|
|
return {
|
|
currentPrice: extractCurrentPrice(csv),
|
|
avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null,
|
|
minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null,
|
|
maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null,
|
|
salesRank: stats?.current?.[3] ?? null,
|
|
salesRankAvg90: stats?.avg?.[3] ?? null,
|
|
salesRankDrops30,
|
|
salesRankDrops90,
|
|
sellerCount: stats?.current?.[11] ?? null,
|
|
amazonIsSeller,
|
|
amazonBuyboxSharePct90d,
|
|
buyBoxSeller: product.buyBoxSellerId ?? null,
|
|
buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null,
|
|
monthlySold,
|
|
categoryTree:
|
|
product.categoryTree?.map((c: { name: string }) => c.name) ?? [],
|
|
};
|
|
}
|
|
|
|
function resolveAmazonIsSeller(
|
|
product: Record<string, any>,
|
|
stats: Record<string, any> | undefined,
|
|
csv: number[][] | undefined,
|
|
): boolean | null {
|
|
if (typeof product.isAmazonSeller === "boolean") {
|
|
return product.isAmazonSeller;
|
|
}
|
|
|
|
if (typeof product.availabilityAmazon === "number") {
|
|
if (product.availabilityAmazon >= 0) return true;
|
|
if (
|
|
product.availabilityAmazon === -1 ||
|
|
product.availabilityAmazon === -2
|
|
) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (stats?.buyBoxIsAmazon === true) {
|
|
return true;
|
|
}
|
|
|
|
if (typeof stats?.current?.[0] === "number") {
|
|
if (stats.current[0] > 0) return true;
|
|
if (stats.current[0] === -1 || stats.current[0] === -2) return false;
|
|
}
|
|
|
|
const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]);
|
|
if (latestAmazonPrice != null) return true;
|
|
|
|
return null;
|
|
}
|
|
|
|
function extractAmazonBuyboxSharePct90d(
|
|
product: Record<string, any>,
|
|
stats: Record<string, any> | undefined,
|
|
): number | null {
|
|
const candidates: unknown[] = [
|
|
product.buyBoxStatsAmazon90,
|
|
stats?.buyBoxStatsAmazon90,
|
|
product.buyBoxStats?.amazon90,
|
|
product.buyBoxStats?.amazon?.[90],
|
|
product.buyBoxStats?.amazon?.["90"],
|
|
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90],
|
|
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"],
|
|
];
|
|
|
|
for (const value of candidates) {
|
|
if (typeof value !== "number" || !Number.isFinite(value)) continue;
|
|
if (value < 0 || value > 100) continue;
|
|
return Math.round(value * 100) / 100;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function computeAmazonBuyBoxSharePctFromHistory(
|
|
history: unknown,
|
|
windowDays: number,
|
|
amazonSellerIds: Set<string>,
|
|
): number | null {
|
|
if (!Array.isArray(history) || history.length < 2) return null;
|
|
|
|
const nowKeepaMinutes =
|
|
Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET;
|
|
const windowStart = nowKeepaMinutes - windowDays * 24 * 60;
|
|
let qualifiedMinutes = 0;
|
|
let amazonMinutes = 0;
|
|
|
|
for (let i = 0; i < history.length - 1; i += 2) {
|
|
const startMinute = Number.parseInt(String(history[i]), 10);
|
|
const sellerId = String(history[i + 1] ?? "").toUpperCase();
|
|
const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes;
|
|
const endMinute = Number.parseInt(String(nextRaw), 10);
|
|
|
|
if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue;
|
|
if (endMinute <= startMinute) continue;
|
|
|
|
const intervalStart = Math.max(startMinute, windowStart);
|
|
const intervalEnd = Math.min(endMinute, nowKeepaMinutes);
|
|
if (intervalEnd <= intervalStart) continue;
|
|
|
|
if (sellerId === "-1" || sellerId === "-2") continue;
|
|
|
|
const minutes = intervalEnd - intervalStart;
|
|
qualifiedMinutes += minutes;
|
|
if (amazonSellerIds.has(sellerId)) {
|
|
amazonMinutes += minutes;
|
|
}
|
|
}
|
|
|
|
if (qualifiedMinutes === 0) return null;
|
|
return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100;
|
|
}
|
|
|
|
function extractLatestPositivePrice(series: unknown): number | null {
|
|
if (!Array.isArray(series) || series.length < 2) return null;
|
|
const last = series[series.length - 1];
|
|
if (typeof last !== "number" || !Number.isFinite(last) || last <= 0) {
|
|
return null;
|
|
}
|
|
return last / 100;
|
|
}
|
|
|
|
function pickKeepaNumber(...values: unknown[]): number | null {
|
|
for (const value of values) {
|
|
if (typeof value !== "number" || !Number.isFinite(value)) continue;
|
|
// Keepa often uses -1 as "not available".
|
|
if (value < 0) continue;
|
|
return value;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function extractCurrentPrice(csv: number[][] | undefined): number | null {
|
|
if (!csv) return null;
|
|
|
|
// csv[0] = Amazon price history, csv[1] = Marketplace new price history
|
|
// Each is [time, price, time, price, ...] — last value is most recent
|
|
for (const series of [csv[0], csv[1]]) {
|
|
if (series && series.length >= 2) {
|
|
const lastPrice = series[series.length - 1]!;
|
|
if (lastPrice > 0) return lastPrice / 100;
|
|
}
|
|
}
|
|
return null;
|
|
}
|