Files
asin-check/src/integrations/keepa.ts

581 lines
17 KiB
TypeScript

import { config } from "../config.ts";
import { normalizeAsin } from "../asin.ts";
import type { KeepaData, KeepaUpcLookupDetail } from "../types.ts";
const KEEPA_BASE = "https://api.keepa.com";
const MAX_ASINS_PER_REQUEST = 100;
const MAX_CODES_PER_REQUEST = MAX_ASINS_PER_REQUEST;
const MAX_KEEPA_RETRIES = 4;
const KEEP_RETRY_BUFFER_MS = 250;
const AMAZON_US_SELLER_ID = "ATVPDKIKX0DER";
const KEEPA_MINUTES_OFFSET = 21_564_000;
const UPC_PATTERN = /^\d{12,14}$/;
type KeepaApiResponse = {
products?: Record<string, any>[];
tokensLeft?: number;
refillRate?: number;
refillIn?: number;
};
// Token-based rate limiting based on Keepa's tokensLeft/refillRate response fields.
// Actual token cost can be greater than 1 depending on endpoint parameters and payload.
// The client keeps request pace using tokensLeft/refillRate/refillIn to avoid 429 bursts.
let tokensLeft = 1; // Conservative start; updated from API response
let refillRate = 1; // tokens per minute, updated from API response
let lastRequestTime = 0;
async function waitForToken(): Promise<void> {
if (tokensLeft > 0) return;
const elapsed = (Date.now() - lastRequestTime) / 60_000; // minutes
const regenerated = Math.floor(elapsed * refillRate);
if (regenerated > 0) {
tokensLeft += regenerated;
return;
}
// Wait until we regenerate at least 1 token
const waitMs =
Math.ceil((1 / refillRate) * 60_000) - (Date.now() - lastRequestTime);
if (waitMs > 0) {
console.log(
`Keepa tokens exhausted. Waiting ${Math.ceil(waitMs / 1000)}s for token regeneration...`,
);
await new Promise((r) => setTimeout(r, waitMs));
}
tokensLeft = 1;
}
function wait(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function buildProductUrl(
queryParam: "asin" | "code",
values: string[],
options?: {
includeStats?: boolean;
includeBuybox?: boolean;
includeHistory?: boolean;
days?: number;
},
): string {
const includeStats = options?.includeStats ?? true;
const includeBuybox = options?.includeBuybox ?? true;
const includeHistory = options?.includeHistory ?? true;
const days = options?.days ?? 90;
const params = new URLSearchParams({
key: config.keepaApiKey,
domain: "1",
});
if (includeStats) {
params.set("stats", String(days));
params.set("days", String(days));
}
if (includeBuybox) {
params.set("buybox", "1");
}
if (!includeHistory) {
params.set("history", "0");
}
params.set(queryParam, values.join(","));
return `${KEEPA_BASE}/product?${params.toString()}`;
}
function updateTokenState(data: KeepaApiResponse): void {
if (data.tokensLeft != null) tokensLeft = data.tokensLeft;
if (data.refillRate != null) refillRate = data.refillRate;
}
function computeWaitMsFromRefill(refillIn?: number): number {
if (
typeof refillIn === "number" &&
Number.isFinite(refillIn) &&
refillIn >= 0
) {
return Math.max(
Math.ceil(refillIn) + KEEP_RETRY_BUFFER_MS,
KEEP_RETRY_BUFFER_MS,
);
}
const safeRefillRate = Math.max(1, refillRate);
return Math.ceil((1 / safeRefillRate) * 60_000) + KEEP_RETRY_BUFFER_MS;
}
function parseErrorPayload(text: string): KeepaApiResponse | null {
try {
const parsed = JSON.parse(text) as KeepaApiResponse;
return parsed && typeof parsed === "object" ? parsed : null;
} catch {
return null;
}
}
async function fetchKeepaWithRetries(
url: string,
operationLabel: string,
): Promise<KeepaApiResponse> {
let lastErrorMessage = "Unknown Keepa error";
for (let attempt = 1; attempt <= MAX_KEEPA_RETRIES; attempt++) {
await waitForToken();
const res = await fetch(url);
lastRequestTime = Date.now();
if (res.ok) {
const data = (await res.json()) as KeepaApiResponse;
updateTokenState(data);
return data;
}
const text = await res.text();
const payload = parseErrorPayload(text);
if (payload) {
updateTokenState(payload);
}
lastErrorMessage = `Keepa API error ${res.status}: ${text}`;
if (res.status !== 429 || attempt === MAX_KEEPA_RETRIES) {
break;
}
const waitMs = computeWaitMsFromRefill(payload?.refillIn);
tokensLeft = Math.min(tokensLeft, 0);
console.warn(
`Keepa throttled during ${operationLabel} (attempt ${attempt}/${MAX_KEEPA_RETRIES}). Waiting ${Math.ceil(waitMs / 1000)}s before retry...`,
);
await wait(waitMs);
}
throw new Error(lastErrorMessage);
}
function normalizeUpc(input: string): string {
return input.trim();
}
function isValidUpc(value: string): boolean {
return UPC_PATTERN.test(value);
}
function normalizeCodeFromKeepa(value: string): string {
return value.replace(/\D/g, "");
}
function collectCodes(value: unknown, target: Set<string>): void {
if (Array.isArray(value)) {
for (const item of value) {
collectCodes(item, target);
}
return;
}
if (typeof value === "number" && Number.isFinite(value)) {
const normalized = normalizeCodeFromKeepa(String(Math.trunc(value)));
if (isValidUpc(normalized)) target.add(normalized);
return;
}
if (typeof value !== "string") {
return;
}
for (const rawPart of value.split(/[\s,;|]+/)) {
if (!rawPart) continue;
const normalized = normalizeCodeFromKeepa(rawPart);
if (isValidUpc(normalized)) target.add(normalized);
}
}
function extractUpcsFromProduct(product: Record<string, any>): string[] {
const codes = new Set<string>();
const candidates: unknown[] = [
product.upcList,
product.upc,
product.eanList,
product.ean,
product.gtinList,
product.gtin,
];
for (const candidate of candidates) {
collectCodes(candidate, codes);
}
return Array.from(codes);
}
function buildFailureDetail(
upc: string,
status: "invalid_upc" | "not_found" | "multiple_asins" | "request_failed",
reason: string,
candidateAsins: string[] = [],
): KeepaUpcLookupDetail {
return {
requestedUpc: upc,
normalizedUpc: upc,
status,
asin: null,
candidateAsins,
keepaData: null,
reason,
};
}
export async function fetchKeepaDataBatch(
asins: string[],
): Promise<Map<string, KeepaData>> {
const results = new Map<string, KeepaData>();
const canonicalAsins = Array.from(
new Set(
asins
.map((asin) => normalizeAsin(asin))
.filter((asin): asin is string => asin !== null),
),
);
// Split into chunks of MAX_ASINS_PER_REQUEST
for (let i = 0; i < canonicalAsins.length; i += MAX_ASINS_PER_REQUEST) {
const chunk = canonicalAsins.slice(i, i + MAX_ASINS_PER_REQUEST);
const url = buildProductUrl("asin", chunk, {
includeStats: true,
includeBuybox: false,
includeHistory: false,
days: 90,
});
console.log(
`Keepa: fetching ${chunk.length} ASINs (tokens left: ${tokensLeft})...`,
);
const data = await fetchKeepaWithRetries(url, "ASIN batch fetch");
console.log(
`Keepa: ${data.products?.length ?? 0} products returned, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
);
if (data.products) {
for (const product of data.products) {
const asin = normalizeAsin(product.asin);
if (!asin) continue;
results.set(asin, parseKeepaProduct(product));
}
}
}
return results;
}
export async function lookupKeepaUpcs(
upcs: string[],
): Promise<Map<string, KeepaUpcLookupDetail>> {
const details = new Map<string, KeepaUpcLookupDetail>();
const validUpcs: string[] = [];
const seenValid = new Set<string>();
for (const rawUpc of upcs) {
const normalized = normalizeUpc(rawUpc);
if (!isValidUpc(normalized)) {
if (!details.has(normalized)) {
details.set(
normalized,
buildFailureDetail(
normalized,
"invalid_upc",
"UPC must be 12, 13, or 14 digits",
),
);
}
continue;
}
if (seenValid.has(normalized)) continue;
seenValid.add(normalized);
validUpcs.push(normalized);
}
for (let i = 0; i < validUpcs.length; i += MAX_CODES_PER_REQUEST) {
const chunk = validUpcs.slice(i, i + MAX_CODES_PER_REQUEST);
const chunkSet = new Set(chunk);
const url = buildProductUrl("code", chunk, {
includeStats: false,
includeBuybox: false,
includeHistory: false,
});
console.log(
`Keepa: mapping ${chunk.length} UPCs to ASINs (tokens left: ${tokensLeft})...`,
);
try {
const data = await fetchKeepaWithRetries(url, "UPC code lookup");
console.log(
`Keepa: ${data.products?.length ?? 0} products returned for UPC query, ${tokensLeft} tokens remaining (refill: ${refillRate}/min)`,
);
const byUpc = new Map<string, Map<string, KeepaData>>();
for (const product of data.products ?? []) {
const asin = normalizeAsin(product.asin);
if (!asin) continue;
const keepaData = parseKeepaProduct(product);
const productUpcs = extractUpcsFromProduct(product);
for (const upc of productUpcs) {
if (!chunkSet.has(upc)) continue;
if (!byUpc.has(upc)) byUpc.set(upc, new Map());
byUpc.get(upc)!.set(asin, keepaData);
}
}
for (const upc of chunk) {
const asinMap = byUpc.get(upc);
if (!asinMap || asinMap.size === 0) {
details.set(
upc,
buildFailureDetail(
upc,
"not_found",
"No Keepa product matched this UPC",
),
);
continue;
}
const candidateAsins = Array.from(asinMap.keys());
if (candidateAsins.length > 1) {
details.set(
upc,
buildFailureDetail(
upc,
"multiple_asins",
`UPC matched multiple ASINs (${candidateAsins.length})`,
candidateAsins,
),
);
continue;
}
const asin = candidateAsins[0]!;
details.set(upc, {
requestedUpc: upc,
normalizedUpc: upc,
status: "found",
asin,
candidateAsins: [asin],
keepaData: asinMap.get(asin) ?? null,
});
}
} catch (error) {
const reason = error instanceof Error ? error.message : String(error);
console.warn(
`Keepa UPC chunk failed (offset ${i}, size ${chunk.length}): ${reason}`,
);
for (const upc of chunk) {
details.set(upc, buildFailureDetail(upc, "request_failed", reason));
}
}
}
return details;
}
export async function mapUpcsToAsins(
upcs: string[],
): Promise<Map<string, string>> {
const details = await lookupKeepaUpcs(upcs);
const mapping = new Map<string, string>();
for (const [upc, detail] of details.entries()) {
if (detail.status === "found" && detail.asin) {
mapping.set(upc, detail.asin);
}
}
return mapping;
}
function parseKeepaProduct(product: Record<string, any>): KeepaData {
const stats = product.stats;
const csv = product.csv;
const salesRankDrops30 = pickKeepaNumber(
product.salesRankDrops30,
stats?.salesRankDrops30,
);
const salesRankDrops90 =
pickKeepaNumber(product.salesRankDrops90, stats?.salesRankDrops90) ??
(salesRankDrops30 != null ? salesRankDrops30 * 3 : null);
const monthlySold =
pickKeepaNumber(product.monthlySold, stats?.monthlySold) ??
salesRankDrops30;
const amazonIsSeller = resolveAmazonIsSeller(product, stats, csv);
const amazonBuyboxSharePct90d =
extractAmazonBuyboxSharePct90d(product, stats) ??
computeAmazonBuyBoxSharePctFromHistory(
product.buyBoxSellerIdHistory,
90,
new Set([AMAZON_US_SELLER_ID]),
);
return {
currentPrice: extractCurrentPrice(csv),
avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null,
minPrice90: stats?.min?.[0] != null ? stats.min[0] / 100 : null,
maxPrice90: stats?.max?.[0] != null ? stats.max[0] / 100 : null,
salesRank: stats?.current?.[3] ?? null,
salesRankAvg90: stats?.avg?.[3] ?? null,
salesRankDrops30,
salesRankDrops90,
sellerCount: stats?.current?.[11] ?? null,
amazonIsSeller,
amazonBuyboxSharePct90d,
buyBoxSeller: product.buyBoxSellerId ?? null,
buyBoxPrice: stats?.current?.[18] != null ? stats.current[18] / 100 : null,
buyBoxAvg90: stats?.avg?.[18] != null ? stats.avg[18] / 100 : null,
monthlySold,
categoryTree:
product.categoryTree?.map((c: { name: string }) => c.name) ?? [],
};
}
function resolveAmazonIsSeller(
product: Record<string, any>,
stats: Record<string, any> | undefined,
csv: number[][] | undefined,
): boolean | null {
if (typeof product.isAmazonSeller === "boolean") {
return product.isAmazonSeller;
}
if (typeof product.availabilityAmazon === "number") {
if (product.availabilityAmazon >= 0) return true;
if (
product.availabilityAmazon === -1 ||
product.availabilityAmazon === -2
) {
return false;
}
}
if (stats?.buyBoxIsAmazon === true) {
return true;
}
if (typeof stats?.current?.[0] === "number") {
if (stats.current[0] > 0) return true;
if (stats.current[0] === -1 || stats.current[0] === -2) return false;
}
const latestAmazonPrice = extractLatestPositivePrice(csv?.[0]);
if (latestAmazonPrice != null) return true;
return null;
}
function extractAmazonBuyboxSharePct90d(
product: Record<string, any>,
stats: Record<string, any> | undefined,
): number | null {
const candidates: unknown[] = [
product.buyBoxStatsAmazon90,
stats?.buyBoxStatsAmazon90,
product.buyBoxStats?.amazon90,
product.buyBoxStats?.amazon?.[90],
product.buyBoxStats?.amazon?.["90"],
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.[90],
product.buyBoxStats?.[AMAZON_US_SELLER_ID]?.["90"],
];
for (const value of candidates) {
if (typeof value !== "number" || !Number.isFinite(value)) continue;
if (value < 0 || value > 100) continue;
return Math.round(value * 100) / 100;
}
return null;
}
function computeAmazonBuyBoxSharePctFromHistory(
history: unknown,
windowDays: number,
amazonSellerIds: Set<string>,
): number | null {
if (!Array.isArray(history) || history.length < 2) return null;
const nowKeepaMinutes =
Math.floor(Date.now() / 60_000) - KEEPA_MINUTES_OFFSET;
const windowStart = nowKeepaMinutes - windowDays * 24 * 60;
let qualifiedMinutes = 0;
let amazonMinutes = 0;
for (let i = 0; i < history.length - 1; i += 2) {
const startMinute = Number.parseInt(String(history[i]), 10);
const sellerId = String(history[i + 1] ?? "").toUpperCase();
const nextRaw = i + 2 < history.length ? history[i + 2] : nowKeepaMinutes;
const endMinute = Number.parseInt(String(nextRaw), 10);
if (!Number.isFinite(startMinute) || !Number.isFinite(endMinute)) continue;
if (endMinute <= startMinute) continue;
const intervalStart = Math.max(startMinute, windowStart);
const intervalEnd = Math.min(endMinute, nowKeepaMinutes);
if (intervalEnd <= intervalStart) continue;
if (sellerId === "-1" || sellerId === "-2") continue;
const minutes = intervalEnd - intervalStart;
qualifiedMinutes += minutes;
if (amazonSellerIds.has(sellerId)) {
amazonMinutes += minutes;
}
}
if (qualifiedMinutes === 0) return null;
return Math.round((amazonMinutes / qualifiedMinutes) * 10_000) / 100;
}
function extractLatestPositivePrice(series: unknown): number | null {
if (!Array.isArray(series) || series.length < 2) return null;
for (let i = series.length - 1; i >= 1; i--) {
if (i % 2 === 0) continue;
const value = series[i];
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
return value / 100;
}
}
return null;
}
function pickKeepaNumber(...values: unknown[]): number | null {
for (const value of values) {
if (typeof value !== "number" || !Number.isFinite(value)) continue;
// Keepa often uses -1 as "not available".
if (value < 0) continue;
return value;
}
return null;
}
function extractCurrentPrice(csv: number[][] | undefined): number | null {
if (!csv) return null;
// csv[0] = Amazon price history, csv[1] = Marketplace new price history
// Each is [time, price, time, price, ...]. Only odd indexes are prices.
for (const series of [csv[0], csv[1]]) {
const latestPrice = extractLatestPositivePrice(series);
if (latestPrice != null) return latestPrice;
}
return null;
}