feat: add UPC to ASIN mapping and large file UPC analysis

Introduces the capability to resolve UPCs to ASINs using the Keepa API. This includes a new `upc-file` command for processing large Excel files of UPCs, a `upc` CLI tool for quick lookups, and API endpoints for web-based integration. The analysis pipeline was refactored into a reusable module to support both standard ASIN leads and new UPC-driven workflows.
This commit is contained in:
Victor Noguera
2026-04-16 23:06:55 -04:00
parent d25cf5d5ec
commit 32e7b0c485
14 changed files with 2278 additions and 250 deletions

View File

@@ -1,9 +1,19 @@
import index from "./web/index.html";
import { getDb, initDb } from "./database.ts";
import { fetchKeepaDataBatch } from "./keepa.ts";
import {
fetchKeepaDataBatch,
lookupKeepaUpcs,
mapUpcsToAsins,
} from "./keepa.ts";
import { runUpcFileAnalysis } from "./upc-file-analysis.ts";
import { fetchSellabilityBatch, fetchSpApiPricingAndFees } from "./sp-api.ts";
import { analyzeProducts } from "./llm.ts";
import type { EnrichedProduct, ProductRecord, SpApiData } from "./types.ts";
import type {
EnrichedProduct,
KeepaUpcLookupDetail,
ProductRecord,
SpApiData,
} from "./types.ts";
type ProcessType = "lead_analysis" | "category_analysis";
@@ -46,6 +56,7 @@ const DB_PATH = process.env.RESULTS_DB_PATH || "./results.db";
const DEFAULT_PAGE_SIZE = 25;
const MAX_PAGE_SIZE = 200;
const ASIN_PATTERN = /^[A-Z0-9]{10}$/;
const MAX_UPCS_PER_REQUEST = 1000;
initDb(DB_PATH);
const db = getDb(DB_PATH);
@@ -82,6 +93,188 @@ function isValidAsin(value: string): boolean {
return ASIN_PATTERN.test(value);
}
function splitRawUpcValues(input: string): string[] {
return input
.split(/[\s,;|]+/)
.map((chunk) => chunk.trim())
.filter(Boolean);
}
function collectUpcsFromUnknown(value: unknown, target: string[]): void {
if (typeof value === "string") {
target.push(...splitRawUpcValues(value));
return;
}
if (typeof value === "number" && Number.isFinite(value)) {
target.push(String(Math.trunc(value)));
return;
}
if (Array.isArray(value)) {
for (const item of value) {
collectUpcsFromUnknown(item, target);
}
}
}
function normalizeAndDedupeUpcs(values: string[]): string[] {
const seen = new Set<string>();
const normalized: string[] = [];
for (const value of values) {
const upc = value.trim();
if (!upc || seen.has(upc)) continue;
seen.add(upc);
normalized.push(upc);
}
return normalized;
}
function parseUpcsFromSearchParams(params: URLSearchParams): string[] {
const parsed: string[] = [];
for (const value of params.getAll("upc")) {
collectUpcsFromUnknown(value, parsed);
}
const upcsValue = params.get("upcs");
if (upcsValue) {
collectUpcsFromUnknown(upcsValue, parsed);
}
return normalizeAndDedupeUpcs(parsed);
}
async function parseUpcsFromRequest(req: Request): Promise<string[]> {
if (req.method === "GET") {
const url = new URL(req.url);
return parseUpcsFromSearchParams(url.searchParams);
}
if (req.method !== "POST") {
throw new Error("Method not allowed");
}
let body: unknown;
try {
body = await req.json();
} catch {
throw new Error("Invalid JSON body");
}
const parsed: string[] = [];
if (body && typeof body === "object" && "upcs" in body) {
collectUpcsFromUnknown((body as { upcs?: unknown }).upcs, parsed);
} else {
collectUpcsFromUnknown(body, parsed);
}
return normalizeAndDedupeUpcs(parsed);
}
function validateUpcRequest(upcs: string[]): string | null {
if (upcs.length === 0) {
return "Provide at least one UPC via query (?upc=...) or JSON body { upcs: [...] }";
}
if (upcs.length > MAX_UPCS_PER_REQUEST) {
return `Too many UPCs. Maximum allowed per request is ${MAX_UPCS_PER_REQUEST}.`;
}
return null;
}
function summarizeLookupStatuses(
details: KeepaUpcLookupDetail[],
): Record<string, number> {
const counts: Record<string, number> = {};
for (const detail of details) {
counts[detail.status] = (counts[detail.status] ?? 0) + 1;
}
return counts;
}
function parsePositiveIntField(
value: unknown,
fieldName: string,
): number | undefined {
if (value == null) return undefined;
if (typeof value === "number") {
if (!Number.isInteger(value) || value < 1) {
throw new Error(`${fieldName} must be a positive integer`);
}
return value;
}
if (typeof value === "string" && value.trim().length > 0) {
const parsed = Number.parseInt(value, 10);
if (!Number.isFinite(parsed) || parsed < 1) {
throw new Error(`${fieldName} must be a positive integer`);
}
return parsed;
}
throw new Error(`${fieldName} must be a positive integer`);
}
type UpcFileProcessRequest = {
inputFile: string;
outputFile?: string;
inputBatchSize?: number;
upcLookupBatchSize?: number;
maxRows?: number;
};
async function parseUpcFileProcessRequest(
req: Request,
): Promise<UpcFileProcessRequest> {
if (req.method !== "POST") {
throw new Error("Method not allowed");
}
let body: unknown;
try {
body = await req.json();
} catch {
throw new Error("Invalid JSON body");
}
if (!body || typeof body !== "object") {
throw new Error("Request body must be an object");
}
const parsedBody = body as Record<string, unknown>;
const inputFileValue = parsedBody.inputFile;
if (
typeof inputFileValue !== "string" ||
inputFileValue.trim().length === 0
) {
throw new Error("inputFile is required and must be a non-empty string");
}
const outputFileValue = parsedBody.outputFile;
if (
outputFileValue != null &&
(typeof outputFileValue !== "string" || outputFileValue.trim().length === 0)
) {
throw new Error("outputFile must be a non-empty string when provided");
}
return {
inputFile: inputFileValue.trim(),
outputFile:
typeof outputFileValue === "string" ? outputFileValue.trim() : undefined,
inputBatchSize: parsePositiveIntField(
parsedBody.inputBatchSize,
"inputBatchSize",
),
upcLookupBatchSize: parsePositiveIntField(
parsedBody.upcLookupBatchSize,
"upcLookupBatchSize",
),
maxRows: parsePositiveIntField(parsedBody.maxRows, "maxRows"),
};
}
function parseSort(
sortParam: string | null,
allowed: Set<string>,
@@ -1074,6 +1267,97 @@ const server = Bun.serve({
const url = new URL(req.url);
return json(getProductList(url.searchParams));
},
"/api/upc/map": async (req) => {
let upcs: string[];
try {
upcs = await parseUpcsFromRequest(req);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
const status = message === "Method not allowed" ? 405 : 400;
return json({ error: message }, status);
}
const validationError = validateUpcRequest(upcs);
if (validationError) {
return json({ error: validationError }, 400);
}
try {
const mapping = await mapUpcsToAsins(upcs);
const items = Array.from(mapping.entries()).map(([upc, asin]) => ({
upc,
asin,
}));
return json({
requested: upcs.length,
matched: items.length,
items,
});
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return json({ error: message }, 500);
}
},
"/api/upc/lookup": async (req) => {
let upcs: string[];
try {
upcs = await parseUpcsFromRequest(req);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
const status = message === "Method not allowed" ? 405 : 400;
return json({ error: message }, status);
}
const validationError = validateUpcRequest(upcs);
if (validationError) {
return json({ error: validationError }, 400);
}
try {
const detailMap = await lookupKeepaUpcs(upcs);
const items = Array.from(detailMap.values());
return json({
requested: upcs.length,
statusCounts: summarizeLookupStatuses(items),
items,
});
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return json({ error: message }, 500);
}
},
"/api/process/upc-file": async (req) => {
let parsed: UpcFileProcessRequest;
try {
parsed = await parseUpcFileProcessRequest(req);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
const status =
message === "Method not allowed"
? 405
: message === "Invalid JSON body"
? 400
: 400;
return json({ error: message }, status);
}
try {
const summary = await runUpcFileAnalysis({
inputFile: parsed.inputFile,
outputFile: parsed.outputFile,
inputBatchSize: parsed.inputBatchSize,
upcLookupBatchSize: parsed.upcLookupBatchSize,
maxRows: parsed.maxRows,
dbPath: DB_PATH,
manageResources: false,
});
return json(summary);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
return json({ error: message }, 500);
}
},
"/api/runs/:processType/:runId": (req) => {
const processType = req.params.processType as ProcessType;
const runId = Number(req.params.runId);