From 95cebaa27cbaf8b587dbcc733b8120876d317b56 Mon Sep 17 00:00:00 2001 From: Victor Noguera Date: Thu, 21 May 2026 19:57:46 -0400 Subject: [PATCH] feat: add support for Claude LLM integration across multiple modules - Introduced `useClaude` option in `AnalysisPipelineOptions` to toggle Claude LLM usage. - Updated `processProductChunk` and `analyzeProducts` functions to accept and handle `useClaude` parameter. - Modified argument parsing in various scripts (`bestsellers-by-category`, `mid-range-sellers-by-category`, `top-monthly-sold-by-category`, etc.) to include `--claude` flag. - Enhanced `analyzeProductsInternal` to differentiate between LLM providers and handle requests to Claude API. - Added error handling for Claude API responses and ensured proper configuration for using Claude. - Updated documentation and usage messages to reflect the new `--claude` flag. --- .env.example | 11 +- README.md | 53 ++++-- src/analysis-pipeline.ts | 4 + src/bestsellers-by-category.ts | 15 +- src/config.ts | 16 +- src/index.ts | 14 +- src/llm.ts | 126 ++++++++++++++- src/mid-range-sellers-by-category.ts | 13 +- src/server.ts | 60 +++++-- src/stalker-analyze.ts | 9 +- src/stalker.ts | 231 +++++++++++++++++---------- src/top-monthly-sold-by-category.ts | 15 +- 12 files changed, 423 insertions(+), 144 deletions(-) diff --git a/.env.example b/.env.example index 9798ca8..246c10e 100644 --- a/.env.example +++ b/.env.example @@ -12,7 +12,10 @@ AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key REDIS_URL=redis://localhost:6379 LLM_URL=http://localhost:1234/v1 LLM_MODEL=default -CACHE_TTL=86400 -GOOGLE_API_KEY=your_google_api_key -GOOGLE_CSE_ID=your_google_programmable_search_engine_id -SERPAPI_API_KEY=your_serpapi_api_key_for_google_shopping +ANTHROPIC_API_KEY=your_anthropic_api_key +ANTHROPIC_MODEL=claude-3-5-sonnet-20241022 +CACHE_TTL=86400 +GOOGLE_API_KEY=your_google_api_key +GOOGLE_CSE_ID=your_google_programmable_search_engine_id +SERPAPI_API_KEY=your_serpapi_api_key_for_google_shopping + diff --git a/README.md b/README.md index 623a477..57b0d21 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,14 @@ cp .env.example .env bun run src/index.ts input/ [--out output/results.xlsx] ``` +Add `--claude` to use Anthropic Claude instead of local LM Studio for LLM analysis. + Examples: ```bash bun run src/index.ts input/leads.xlsx bun run src/index.ts input/leads.csv --out output/results.xlsx +bun run src/index.ts input/leads.xlsx --claude ``` Large-file behavior: @@ -55,6 +58,14 @@ bun run monthly-sold bun run mid-range ``` +Use Claude for category LLM analysis: + +```bash +bun run bestsellers --claude +bun run monthly-sold --claude +bun run mid-range --claude +``` + Mid-range process: - Script: `bun run mid-range` @@ -128,6 +139,12 @@ curl -X POST "http://localhost:3000/api/upc/lookup" \ -d '{"upcs":["012345678901","098765432109"]}' ``` +Run the web server with Claude-backed LLM calls: + +```bash +bun run start:web -- --claude +``` + ## Large UPC File Analysis (XLS/XLSX) For supplier price lists that contain UPC/EAN values and unit cost, use the @@ -248,23 +265,25 @@ ASIN, Name, Brand, Category, Unit Cost, Current Price, Avg Price 90d, Sales Rank ## Environment variables -| Variable | Default | Description | -| ----------------------- | -------------------------- | ----------------------------------------------------------------------- | -| `KEEPA_API_KEY` | — | **Required.** Keepa API key | -| `SP_API_CLIENT_ID` | — | LWA app client id from Solution Provider Portal | -| `SP_API_CLIENT_SECRET` | — | LWA app client secret from Solution Provider Portal | -| `SP_API_REFRESH_TOKEN` | — | Refresh token from self-authorization | -| `SP_API_REGION` | `na` | SP-API endpoint region (`na`, `eu`, `fe`; `us` is accepted as `na`) | -| `SP_API_MARKETPLACE_ID` | `ATVPDKIKX0DER` | Marketplace id used for pricing and fee calls (default: US) | -| `SP_API_SELLER_ID` | — | Seller ID used for listing restrictions eligibility checks | -| `SP_API_USE_SANDBOX` | `false` | Enable SP-API sandbox mode (`true`/`false`) | -| `AWS_ACCESS_KEY_ID` | — | AWS credentials for SigV4 signing (required in most private app setups) | -| `AWS_SECRET_ACCESS_KEY` | — | AWS credentials for SigV4 signing | -| `AWS_SESSION_TOKEN` | — | Optional session token when using STS credentials | -| `REDIS_URL` | `redis://localhost:6379` | Redis connection URL | -| `LLM_URL` | `http://localhost:1234/v1` | LM Studio API base URL | -| `LLM_MODEL` | `default` | Model name to pass to LM Studio | -| `CACHE_TTL` | `86400` | Redis cache TTL in seconds | +| Variable | Default | Description | +| ----------------------- | ---------------------------- | ----------------------------------------------------------------------- | +| `KEEPA_API_KEY` | — | **Required.** Keepa API key | +| `SP_API_CLIENT_ID` | — | LWA app client id from Solution Provider Portal | +| `SP_API_CLIENT_SECRET` | — | LWA app client secret from Solution Provider Portal | +| `SP_API_REFRESH_TOKEN` | — | Refresh token from self-authorization | +| `SP_API_REGION` | `na` | SP-API endpoint region (`na`, `eu`, `fe`; `us` is accepted as `na`) | +| `SP_API_MARKETPLACE_ID` | `ATVPDKIKX0DER` | Marketplace id used for pricing and fee calls (default: US) | +| `SP_API_SELLER_ID` | — | Seller ID used for listing restrictions eligibility checks | +| `SP_API_USE_SANDBOX` | `false` | Enable SP-API sandbox mode (`true`/`false`) | +| `AWS_ACCESS_KEY_ID` | — | AWS credentials for SigV4 signing (required in most private app setups) | +| `AWS_SECRET_ACCESS_KEY` | — | AWS credentials for SigV4 signing | +| `AWS_SESSION_TOKEN` | — | Optional session token when using STS credentials | +| `REDIS_URL` | `redis://localhost:6379` | Redis connection URL | +| `LLM_URL` | `http://localhost:1234/v1` | LM Studio API base URL | +| `LLM_MODEL` | `default` | Model name to pass to LM Studio | +| `ANTHROPIC_API_KEY` | — | Required when running any LLM script with `--claude` | +| `ANTHROPIC_MODEL` | `claude-3-5-sonnet-20241022` | Claude model ID used with `--claude` | +| `CACHE_TTL` | `86400` | Redis cache TTL in seconds | ## Notes diff --git a/src/analysis-pipeline.ts b/src/analysis-pipeline.ts index 483aa7a..b78f5c9 100644 --- a/src/analysis-pipeline.ts +++ b/src/analysis-pipeline.ts @@ -22,6 +22,7 @@ export type AnalysisPipelineOptions = { llmBatchDelayMs?: number; llmRetryDelayMs?: number; sellability?: SellabilityFilter; + useClaude?: boolean; }; export function chunkArray(items: T[], chunkSize: number): T[][] { @@ -60,6 +61,7 @@ export async function processProductChunk( const llmBatchDelayMs = Math.max(0, options.llmBatchDelayMs ?? 5_000); const llmRetryDelayMs = Math.max(0, options.llmRetryDelayMs ?? 10_000); const sellabilityFilter = options.sellability ?? "available"; + const useClaude = options.useClaude === true; console.log(`\nChecking cache for ${products.length} products...`); const cached = new Map(); @@ -242,6 +244,7 @@ export async function processProductChunk( try { verdicts = await analyzeProducts(batch, { ignoreSellability: sellabilityFilter === "all", + useClaude, }); } catch { if (llmRetryDelayMs > 0) { @@ -250,6 +253,7 @@ export async function processProductChunk( try { verdicts = await analyzeProducts(batch, { ignoreSellability: sellabilityFilter === "all", + useClaude, }); } catch { verdicts = null; diff --git a/src/bestsellers-by-category.ts b/src/bestsellers-by-category.ts index 5a7b1d3..ed0721c 100644 --- a/src/bestsellers-by-category.ts +++ b/src/bestsellers-by-category.ts @@ -26,6 +26,7 @@ type ParsedArgs = { categoryLimit: number; perCategoryTop: number; blacklistFile: string; + useClaude: boolean; }; type CategoryRunSummary = { @@ -72,6 +73,7 @@ function log( function parseArgs(): ParsedArgs { const args = process.argv.slice(2); + const useClaude = hasFlag(args, "--claude"); const outputDir = readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); const blacklistFile = @@ -100,9 +102,14 @@ function parseArgs(): ParsedArgs { categoryLimit, perCategoryTop, blacklistFile, + useClaude, }; } +function hasFlag(args: string[], flag: string): boolean { + return args.includes(flag); +} + function readFlagValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); if (idx === -1) return undefined; @@ -118,7 +125,7 @@ function printUsageAndExit(message: string): never { "error", [ "Usage:", - " bun run src/bestsellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--out-dir output] [--blacklist-file category-blacklist.csv]", + " bun run src/bestsellers-by-category.ts [--category-limit 32] [--per-category-top 100] [--out-dir output] [--blacklist-file category-blacklist.csv] [--claude]", "", "Flow:", " 1) Discover categories and round-robin selection.", @@ -1011,6 +1018,7 @@ export async function processCategory( runId: number, category: CategoryInfo, perCategoryTop: number, + useClaude = false, ): Promise { log("info", `\nCategory ${category.label} (${category.id})`); @@ -1106,7 +1114,7 @@ export async function processCategory( let batchVerdicts: LlmVerdict[]; try { - batchVerdicts = await analyzeProducts(batch); + batchVerdicts = await analyzeProducts(batch, { useClaude }); } catch (err) { const message = err instanceof Error ? err.message : String(err); log("warn", ` LLM batch failed: ${message}`); @@ -1192,7 +1200,7 @@ export async function main(): Promise { mkdirSync(args.outputDir, { recursive: true }); const DB_PATH = - process.env.RESULTS_DB_PATH || path.join(process.cwd(), "db", "results.db"); + process.env.RESULTS_DB_PATH || path.join(process.cwd(), "db", "results.db"); initDb(DB_PATH); const db = getDb(DB_PATH); @@ -1249,6 +1257,7 @@ export async function main(): Promise { runId, category, args.perCategoryTop, + args.useClaude, ); totalInsertedAsins += categorySummary.results?.length ?? 0; diff --git a/src/config.ts b/src/config.ts index 08f3de6..02ea3c4 100644 --- a/src/config.ts +++ b/src/config.ts @@ -18,13 +18,15 @@ function optionalBoolean(key: string, fallback: boolean): boolean { export const config = { keepaApiKey: required("KEEPA_API_KEY"), redisUrl: optional("REDIS_URL", "redis://localhost:6379"), - llmUrl: optional("LLM_URL", "http://localhost:1234/v1"), - llmModel: optional("LLM_MODEL", "default"), - cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10), - searxngUrl: optional("SEARXNG_URL", "https://searxng.nvictor.me/"), - searxngTimeoutMs: parseInt(optional("SEARXNG_TIMEOUT_MS", "10000"), 10), - searxngMaxResults: parseInt(optional("SEARXNG_MAX_RESULTS", "10"), 10), - spApiClientId: Bun.env.SP_API_CLIENT_ID, + llmUrl: optional("LLM_URL", "http://localhost:1234/v1"), + llmModel: optional("LLM_MODEL", "default"), + anthropicApiKey: Bun.env.ANTHROPIC_API_KEY, + anthropicModel: Bun.env.ANTHROPIC_MODEL, + cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10), + searxngUrl: optional("SEARXNG_URL", "https://searxng.nvictor.me/"), + searxngTimeoutMs: parseInt(optional("SEARXNG_TIMEOUT_MS", "10000"), 10), + searxngMaxResults: parseInt(optional("SEARXNG_MAX_RESULTS", "10"), 10), + spApiClientId: Bun.env.SP_API_CLIENT_ID, spApiClientSecret: Bun.env.SP_API_CLIENT_SECRET, spApiRefreshToken: Bun.env.SP_API_REFRESH_TOKEN, spApiRegion: optional("SP_API_REGION", "na"), diff --git a/src/index.ts b/src/index.ts index f88f424..ec197cc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -42,9 +42,11 @@ function parseArgs(): { inputFile: string; outputFile?: string; sellability: SellabilityFilter; + useClaude: boolean; } { const args = process.argv.slice(2); const outputFile = readFlagValue(args, "--out", "--output"); + const useClaude = args.includes("--claude"); const inputFile = readInputFileArg( args, "--out", @@ -55,12 +57,12 @@ function parseArgs(): { if (!inputFile) { console.error( - "Usage: bun run src/index.ts [--out results.xlsx|--output results.xlsx] [--sellability available|all]", + "Usage: bun run src/index.ts [--out results.xlsx|--output results.xlsx] [--sellability available|all] [--claude]", ); process.exit(1); } - return { inputFile, outputFile, sellability }; + return { inputFile, outputFile, sellability, useClaude }; } function readFlagValue(args: string[], ...flags: string[]): string | undefined { @@ -109,9 +111,10 @@ function resolveBaseOutputPath(inputFile: string, outputFile?: string): string { } async function main() { - const { inputFile, outputFile, sellability } = parseArgs(); + const { inputFile, outputFile, sellability, useClaude } = parseArgs(); console.log(`Sellability filter: ${sellability}`); + console.log(`LLM provider: ${useClaude ? "claude" : "local"}`); console.log("Connecting to Redis..."); await connectCache(); @@ -144,7 +147,10 @@ async function main() { console.log( `\n=== Input chunk ${chunkIndex + 1}/${productChunks.length} (${chunk.length} products) ===`, ); - const chunkResults = await processProductChunk(chunk, { sellability }); + const chunkResults = await processProductChunk(chunk, { + sellability, + useClaude, + }); allResults.push(...chunkResults); } diff --git a/src/llm.ts b/src/llm.ts index ba69dcd..aeee1e8 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -56,6 +56,17 @@ Keep each reasoning under 100 characters to stay within output limits and mentio type AnalyzeProductsOptions = { ignoreSellability?: boolean; + useClaude?: boolean; +}; + +type LlmProvider = "lm-studio" | "claude"; + +type LmStudioResponse = { + choices?: { message?: { content?: string } }[]; +}; + +type ClaudeResponse = { + content?: Array<{ type?: string; text?: string }>; }; function getSystemPrompt(options: AnalyzeProductsOptions): string { @@ -72,8 +83,7 @@ export async function analyzeProducts( try { return await analyzeProductsInternal(products, options); } catch (err) { - const msg = String(err); - if (products.length > 1 && msg.includes("Context size has been exceeded")) { + if (products.length > 1 && isContextOverflowError(err)) { console.warn( `LLM context exceeded for batch of ${products.length}, retrying one product at a time...`, ); @@ -113,7 +123,43 @@ async function analyzeProductsInternal( summarizeForLlm(p, options.ignoreSellability === true), ); const systemPrompt = getSystemPrompt(options); + const provider = options.useClaude ? "claude" : "lm-studio"; + const content = await requestLlmContent( + provider, + systemPrompt, + productSummaries, + ); + return parseVerdicts(content, products); +} + +function isContextOverflowError(err: unknown): boolean { + const msg = String(err).toLowerCase(); + return ( + msg.includes("context size has been exceeded") || + msg.includes("prompt is too long") || + msg.includes("too many tokens") || + msg.includes("maximum context") || + msg.includes("context length") || + msg.includes("max_tokens") + ); +} + +async function requestLlmContent( + provider: LlmProvider, + systemPrompt: string, + productSummaries: ReturnType[], +): Promise { + if (provider === "claude") { + return requestClaudeContent(systemPrompt, productSummaries); + } + return requestLmStudioContent(systemPrompt, productSummaries); +} + +async function requestLmStudioContent( + systemPrompt: string, + productSummaries: ReturnType[], +): Promise { const res = await fetch(`${config.llmUrl}/chat/completions`, { method: "POST", headers: { @@ -132,15 +178,79 @@ async function analyzeProductsInternal( }); if (!res.ok) { - throw new Error(`LLM API error ${res.status}: ${await res.text()}`); + throw new Error(`LLM API error ${res.status}: ${await readErrorBody(res)}`); } - const data = (await res.json()) as { - choices?: { message?: { content?: string } }[]; - }; - const content = data.choices?.[0]?.message?.content ?? ""; + const data = (await res.json()) as LmStudioResponse; + return data.choices?.[0]?.message?.content ?? ""; +} - return parseVerdicts(content, products); +async function requestClaudeContent( + systemPrompt: string, + productSummaries: ReturnType[], +): Promise { + if (!config.anthropicApiKey) { + throw new Error( + "Missing required env var for --claude mode: ANTHROPIC_API_KEY", + ); + } + + const res = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": config.anthropicApiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model: config.anthropicModel, + system: systemPrompt, + messages: [ + { role: "user", content: JSON.stringify(productSummaries, null, 2) }, + ], + temperature: 0.3, + max_tokens: 2048, + }), + }); + + if (!res.ok) { + throw new Error( + `Claude API error ${res.status}: ${await readErrorBody(res)}`, + ); + } + + const data = (await res.json()) as ClaudeResponse; + if (!Array.isArray(data.content)) { + return ""; + } + + return data.content + .filter((block) => block?.type === "text" && typeof block.text === "string") + .map((block) => block.text ?? "") + .join("\n"); +} + +async function readErrorBody(response: Response): Promise { + const text = await response.text(); + if (!text.trim()) return "No response body"; + + try { + const parsed = JSON.parse(text) as { + error?: { message?: string; type?: string }; + }; + const type = parsed.error?.type?.trim(); + const message = parsed.error?.message?.trim(); + if (type && message) { + return `${type}: ${message}`; + } + if (message) { + return message; + } + } catch { + // Response was plain text. + } + + return text; } function summarizeForLlm(p: EnrichedProduct, ignoreSellability: boolean) { diff --git a/src/mid-range-sellers-by-category.ts b/src/mid-range-sellers-by-category.ts index 6c4fd94..154e01b 100644 --- a/src/mid-range-sellers-by-category.ts +++ b/src/mid-range-sellers-by-category.ts @@ -34,6 +34,7 @@ type ParsedArgs = { selectCategories: boolean; categoryIds: number[]; sellabilityGate: "strict" | "soft" | "off"; + useClaude: boolean; outputDir: string; categoryLimit: number; perCategoryTop: number; @@ -118,6 +119,7 @@ function parseArgs(): ParsedArgs { const args = process.argv.slice(2); const listCategories = hasFlag(args, "--list-categories"); const selectCategories = hasFlag(args, "--select-categories"); + const useClaude = hasFlag(args, "--claude"); const categoryIdsRaw = readFlagValue(args, "--category-ids"); const sellabilityGateRaw = readFlagValue(args, "--sellability-gate"); const outputDir = @@ -312,6 +314,7 @@ function parseArgs(): ParsedArgs { selectCategories, categoryIds, sellabilityGate, + useClaude, outputDir, categoryLimit, perCategoryTop, @@ -370,7 +373,7 @@ function printUsageAndExit(message: string): never { "error", [ "Usage:", - " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--list-categories] [--select-categories] [--category-ids 281053,172282] [--sellability-gate soft] [--per-category-top 100] [--category-candidate-pool 500] [--candidate-batch-size 60] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--max-asins-analyzed 250] [--max-keepa-products-fetched 500] [--out-dir output] [--blacklist-file category-blacklist.csv]", + " bun run src/mid-range-sellers-by-category.ts [--category-limit 32] [--list-categories] [--select-categories] [--category-ids 281053,172282] [--sellability-gate soft] [--per-category-top 100] [--category-candidate-pool 500] [--candidate-batch-size 60] [--min-monthly-sold 100] [--max-monthly-sold 1000] [--min-price 15] [--max-price 200] [--min-seller-count 3] [--max-seller-count 20] [--min-amazon-buybox-share-pct 15] [--max-amazon-buybox-share-pct 85] [--max-asins-analyzed 250] [--max-keepa-products-fetched 500] [--out-dir output] [--blacklist-file category-blacklist.csv] [--claude]", "", "Selection:", " --list-categories Discover and print runnable categories, then exit.", @@ -1482,6 +1485,7 @@ export async function processCategory( minAmazonBuyboxSharePct: number, maxAmazonBuyboxSharePct: number, sellabilityGate: "strict" | "soft" | "off", + useClaude = false, runtimeBudget?: RuntimeBudget, candidateBatchSize = DEFAULT_CANDIDATE_BATCH_SIZE, ): Promise { @@ -1739,7 +1743,7 @@ export async function processCategory( let batchVerdicts: LlmVerdict[]; try { - batchVerdicts = await analyzeProducts(batch); + batchVerdicts = await analyzeProducts(batch, { useClaude }); } catch (err) { const message = err instanceof Error ? err.message : String(err); log("warn", ` LLM batch failed: ${message}`); @@ -1920,8 +1924,8 @@ export async function main(): Promise { try { mkdirSync(args.outputDir, { recursive: true }); const DB_PATH = - process.env.RESULTS_DB_PATH || - path.join(process.cwd(), "db", "results.db"); + process.env.RESULTS_DB_PATH || + path.join(process.cwd(), "db", "results.db"); initDb(DB_PATH); const db = getDb(DB_PATH); @@ -2014,6 +2018,7 @@ export async function main(): Promise { args.minAmazonBuyboxSharePct, args.maxAmazonBuyboxSharePct, args.sellabilityGate, + args.useClaude, runtimeBudget, args.candidateBatchSize, ); diff --git a/src/server.ts b/src/server.ts index b177c69..6f275b0 100644 --- a/src/server.ts +++ b/src/server.ts @@ -103,6 +103,7 @@ const DEFAULT_PAGE_SIZE = 25; const MAX_PAGE_SIZE = 200; const ASIN_PATTERN = /^[A-Z0-9]{10}$/; const MAX_UPCS_PER_REQUEST = 1000; +const USE_CLAUDE = process.argv.includes("--claude"); initDb(DB_PATH); const db = getDb(DB_PATH); @@ -128,7 +129,8 @@ function xlsx(buffer: ArrayBuffer, filename: string): Response { return new Response(buffer, { status: 200, headers: { - "content-type": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "content-type": + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "content-disposition": `attachment; filename="${filename}"`, }, }); @@ -758,7 +760,10 @@ function parseStalkerSort(sortParam: string | null): string { return parsed .replaceAll("runId", "runId") .replaceAll("rating_count", "rating_count") - .replaceAll("persisted_inventory_asin_count", "persisted_inventory_asin_count") + .replaceAll( + "persisted_inventory_asin_count", + "persisted_inventory_asin_count", + ) .replaceAll("storefront_asin_total", "storefront_asin_total"); } @@ -955,7 +960,11 @@ function parseStalkerProductSort(sortParam: string | null): string { "confidence", "last_seen_at", ]); - return parseSort(sortParam, allowedSort, "monthly_sold DESC, last_seen_at DESC, asin ASC"); + return parseSort( + sortParam, + allowedSort, + "monthly_sold DESC, last_seen_at DESC, asin ASC", + ); } function getStalkerProducts(filters: URLSearchParams) { @@ -1036,7 +1045,9 @@ function getStalkerProducts(filters: URLSearchParams) { }; } -function getStalkerProductsForExport(filters: URLSearchParams): StalkerProductRecord[] { +function getStalkerProductsForExport( + filters: URLSearchParams, +): StalkerProductRecord[] { const { where, params } = parseStalkerProductFilters(filters); const orderBy = parseStalkerProductSort(filters.get("sort")); @@ -1100,7 +1111,12 @@ function exportStalkerProductsXlsx(filters: URLSearchParams): Response { Category: parseCategoryTreeForExport(row.category_tree), "Monthly Sold": row.monthly_sold ?? null, Sellers: row.seller_count ?? null, - "Amazon Seller": row.amazon_is_seller == null ? "" : row.amazon_is_seller === 1 ? "Yes" : "No", + "Amazon Seller": + row.amazon_is_seller == null + ? "" + : row.amazon_is_seller === 1 + ? "Yes" + : "No", "Sales Rank": row.sales_rank ?? null, "Current Price": row.current_price ?? null, "Avg 90d": row.avg_price_90d ?? null, @@ -1155,11 +1171,31 @@ function exportStalkerProductsXlsx(filters: URLSearchParams): Response { function purgeStalkerData() { const counts = { - inventory: (db.query("SELECT COUNT(*) AS count FROM stalker_seller_inventory").get() as { count: number }).count, - asinSellers: (db.query("SELECT COUNT(*) AS count FROM stalker_asin_sellers").get() as { count: number }).count, - sellers: (db.query("SELECT COUNT(*) AS count FROM stalker_sellers").get() as { count: number }).count, - scans: (db.query("SELECT COUNT(*) AS count FROM stalker_asin_scans").get() as { count: number }).count, - runs: (db.query("SELECT COUNT(*) AS count FROM stalker_runs").get() as { count: number }).count, + inventory: ( + db + .query("SELECT COUNT(*) AS count FROM stalker_seller_inventory") + .get() as { count: number } + ).count, + asinSellers: ( + db.query("SELECT COUNT(*) AS count FROM stalker_asin_sellers").get() as { + count: number; + } + ).count, + sellers: ( + db.query("SELECT COUNT(*) AS count FROM stalker_sellers").get() as { + count: number; + } + ).count, + scans: ( + db.query("SELECT COUNT(*) AS count FROM stalker_asin_scans").get() as { + count: number; + } + ).count, + runs: ( + db.query("SELECT COUNT(*) AS count FROM stalker_runs").get() as { + count: number; + } + ).count, }; db.transaction(() => { @@ -1683,7 +1719,9 @@ async function reanalyzeSingleAsin( fetchedAt: new Date().toISOString(), }; - const verdicts = await analyzeProducts([enriched]); + const verdicts = await analyzeProducts([enriched], { + useClaude: USE_CLAUDE, + }); const verdict = verdicts[0] ?? { asin, verdict: "SKIP" as const, diff --git a/src/stalker-analyze.ts b/src/stalker-analyze.ts index b70da5c..45b50dd 100644 --- a/src/stalker-analyze.ts +++ b/src/stalker-analyze.ts @@ -17,6 +17,7 @@ type Args = { stalkerRunId: number; analysisRunId: number; asins: string[]; + useClaude: boolean; }; type InventoryRow = { @@ -45,6 +46,7 @@ function parseArgs(argv = process.argv.slice(2)): Args { const dbPath = readFlagValue(argv, "--db"); const stalkerRunId = Number(readFlagValue(argv, "--stalker-run-id")); const analysisRunId = Number(readFlagValue(argv, "--analysis-run-id")); + const useClaude = argv.includes("--claude"); const asins = (readFlagValue(argv, "--asins") ?? "") .split(",") .map((asin) => asin.trim().toUpperCase()) @@ -59,7 +61,7 @@ function parseArgs(argv = process.argv.slice(2)): Args { } if (asins.length === 0) throw new Error("Missing --asins"); - return { dbPath, stalkerRunId, analysisRunId, asins }; + return { dbPath, stalkerRunId, analysisRunId, asins, useClaude }; } function wait(ms: number): Promise { @@ -299,6 +301,7 @@ function refreshAnalysisRun(database: Database, runId: number): void { async function analyzeInBatches( products: EnrichedProduct[], + useClaude: boolean, ): Promise { const results: AnalysisResult[] = []; @@ -316,7 +319,7 @@ async function analyzeInBatches( let verdicts; try { - verdicts = await analyzeProducts(batch); + verdicts = await analyzeProducts(batch, { useClaude }); } catch (error) { console.warn( `Stalker analysis: LLM batch ${batchNumber} failed: ${ @@ -358,7 +361,7 @@ async function main(): Promise { console.log(`Stalker analysis: analyzing ${rows.length} sellable ASIN(s).`); const enriched = await buildEnrichedProducts(rows); - const results = await analyzeInBatches(enriched); + const results = await analyzeInBatches(enriched, args.useClaude); insertProductAnalysisResults(database, args.analysisRunId, results); refreshAnalysisRun(database, args.analysisRunId); } finally { diff --git a/src/stalker.ts b/src/stalker.ts index 7a30589..ac3abc8 100644 --- a/src/stalker.ts +++ b/src/stalker.ts @@ -41,6 +41,7 @@ export type StalkerArgs = { maxSellerRequests: number | null; sellability: boolean; analyzeSellable: boolean; + useClaude: boolean; }; export type StalkerOffer = { @@ -143,8 +144,12 @@ export function parseArgs(argv = process.argv.slice(2)): StalkerArgs { const storefrontUpdateHours = storefrontUpdateRaw ? Number(storefrontUpdateRaw) : DEFAULT_STOREFRONT_UPDATE_HOURS; - const offerLimit = offerLimitRaw ? Number(offerLimitRaw) : DEFAULT_OFFER_LIMIT; - const sellerLimit = sellerLimitRaw ? Number(sellerLimitRaw) : DEFAULT_SELLER_LIMIT; + const offerLimit = offerLimitRaw + ? Number(offerLimitRaw) + : DEFAULT_OFFER_LIMIT; + const sellerLimit = sellerLimitRaw + ? Number(sellerLimitRaw) + : DEFAULT_SELLER_LIMIT; const inventoryLimit = inventoryLimitRaw ? Number(inventoryLimitRaw) : DEFAULT_INVENTORY_LIMIT; @@ -159,6 +164,7 @@ export function parseArgs(argv = process.argv.slice(2)): StalkerArgs { const resume = !hasFlag(argv, "--no-resume"); const sellability = hasFlag(argv, "--sellability"); const analyzeSellable = hasFlag(argv, "--analyze-sellable"); + const useClaude = hasFlag(argv, "--claude"); if (analyzeSellable && !sellability) { printUsageAndExit("--analyze-sellable requires --sellability."); @@ -168,10 +174,7 @@ export function parseArgs(argv = process.argv.slice(2)): StalkerArgs { printUsageAndExit("--max-asins must be a positive integer."); } - if ( - !Number.isInteger(storefrontUpdateHours) || - storefrontUpdateHours < 0 - ) { + if (!Number.isInteger(storefrontUpdateHours) || storefrontUpdateHours < 0) { printUsageAndExit( "--storefront-update-hours must be a non-negative integer.", ); @@ -215,6 +218,7 @@ export function parseArgs(argv = process.argv.slice(2)): StalkerArgs { maxSellerRequests, sellability, analyzeSellable, + useClaude, }; } @@ -232,9 +236,13 @@ export function readAsinsFromXlsx(filePath: string): string[] { if (rows.length === 0) throw new Error("File contains no data rows"); const headers = Object.keys(rows[0]!); - const asinColumn = headers.find((header) => normalizeHeader(header) === "asin"); + const asinColumn = headers.find( + (header) => normalizeHeader(header) === "asin", + ); if (!asinColumn) { - throw new Error(`No ASIN column found. Available columns: ${headers.join(", ")}`); + throw new Error( + `No ASIN column found. Available columns: ${headers.join(", ")}`, + ); } return extractAsinsFromRows(rows, asinColumn); @@ -287,7 +295,9 @@ export function extractLiveOfferSellerCandidates( offerPrice: extractOfferPrice(offer), condition: extractString(offer.condition ?? offer.conditionComment), isFba: extractBoolean(offer.isFBA ?? offer.isFba ?? offer.fba), - stock: extractNumber(offer.stock ?? offer.stockCount ?? offer.currentStock), + stock: extractNumber( + offer.stock ?? offer.stockCount ?? offer.currentStock, + ), rawOffer: offer, }); } @@ -305,7 +315,9 @@ export async function runStalker(args: StalkerArgs): Promise { initDb(args.dbPath); const database = getDb(args.dbPath); - const completedAsins = args.resume ? loadPreviouslyScannedAsins(database) : new Set(); + const completedAsins = args.resume + ? loadPreviouslyScannedAsins(database) + : new Set(); const resumeFilteredAsins = cappedAsins.filter( (asin) => !completedAsins.has(asin), ); @@ -341,24 +353,32 @@ export async function runStalker(args: StalkerArgs): Promise { try { if (args.dryRun) { - console.log("Stalker dry-run: product and seller metadata will be fetched, storefronts will not be fetched or persisted."); + console.log( + "Stalker dry-run: product and seller metadata will be fetched, storefronts will not be fetched or persisted.", + ); } if (stats.skippedAsins > 0) { - console.log(`Stalker resume: skipped ${stats.skippedAsins} previously scanned ASIN(s).`); + console.log( + `Stalker resume: skipped ${stats.skippedAsins} previously scanned ASIN(s).`, + ); } for (const asin of resumeFilteredAsins) { - console.log(`Stalker: scanning ${asin} (${stats.scannedAsins + 1}/${resumeFilteredAsins.length})`); + console.log( + `Stalker: scanning ${asin} (${stats.scannedAsins + 1}/${resumeFilteredAsins.length})`, + ); - const result = await scanAsin(asin, args, apiKey, context).catch((error) => ({ - asin, - title: null, - offerCount: 0, - candidateSellerCount: 0, - matchedSellers: [], - product: null, - error: error instanceof Error ? error.message : String(error), - })); + const result = await scanAsin(asin, args, apiKey, context).catch( + (error) => ({ + asin, + title: null, + offerCount: 0, + candidateSellerCount: 0, + matchedSellers: [], + product: null, + error: error instanceof Error ? error.message : String(error), + }), + ); if (args.sellability && !args.dryRun) { await enrichInventorySellability(result, stats); @@ -379,7 +399,13 @@ export async function runStalker(args: StalkerArgs): Promise { analysisRunId != null && sellableAsins.length > 0 ) { - await runSellableAnalysisChild(args.dbPath, runId, analysisRunId, sellableAsins); + await runSellableAnalysisChild( + args.dbPath, + runId, + analysisRunId, + sellableAsins, + args.useClaude, + ); } stats.scannedAsins += 1; stats.matchedSellers += result.matchedSellers.length; @@ -398,7 +424,9 @@ export async function runStalker(args: StalkerArgs): Promise { ); if (stats.stoppedEarly) { - console.log("Stalker: stopping early because max seller request budget was reached."); + console.log( + "Stalker: stopping early because max seller request budget was reached.", + ); break; } } @@ -423,12 +451,7 @@ export async function runStalker(args: StalkerArgs): Promise { } catch (error) { const message = error instanceof Error ? error.message : String(error); if (!args.dryRun && runId != null) { - finishStalkerRunWithError( - database, - runId, - stats, - message, - ); + finishStalkerRunWithError(database, runId, stats, message); } if (!args.dryRun && analysisRunId != null) { finishStalkerAnalysisRun(database, analysisRunId, "failed", message); @@ -549,12 +572,11 @@ async function enrichInventorySellability( } for (const item of items) { - item.sellability = - sellabilityMap.get(item.asin) ?? { - canSell: null, - sellabilityStatus: "unknown", - sellabilityReason: "Sellability check returned no result", - }; + item.sellability = sellabilityMap.get(item.asin) ?? { + canSell: null, + sellabilityStatus: "unknown", + sellabilityReason: "Sellability check returned no result", + }; } for (const seller of sellers) { @@ -571,14 +593,19 @@ async function enrichInventoryProductDetails( result: StalkerAsinResult, apiKey: string, ): Promise { - const items = result.matchedSellers.flatMap(({ seller }) => seller.storefrontItems); + const items = result.matchedSellers.flatMap( + ({ seller }) => seller.storefrontItems, + ); const uniqueAsins = Array.from(new Set(items.map((item) => item.asin))); if (uniqueAsins.length === 0) return; console.log( `Stalker inventory details: fetching Keepa product details for ${uniqueAsins.length} sellable ASIN(s)...`, ); - const detailsByAsin = await fetchKeepaInventoryProductDetails(apiKey, uniqueAsins); + const detailsByAsin = await fetchKeepaInventoryProductDetails( + apiKey, + uniqueAsins, + ); for (const item of items) { item.productDetails = detailsByAsin.get(item.asin) ?? null; @@ -761,7 +788,8 @@ function canSpendSellerRequests( ): boolean { if (args.maxSellerRequests == null) return true; const spent = - context.stats.sellerMetadataRequests + context.stats.sellerStorefrontRequests; + context.stats.sellerMetadataRequests + + context.stats.sellerStorefrontRequests; if (spent + nextRequests <= args.maxSellerRequests) return true; context.stats.stoppedEarly = true; return false; @@ -856,7 +884,8 @@ function upsertAsinScan( `SELECT id FROM stalker_asin_scans WHERE run_id = ? AND source_asin = ?`, ) .get(runId, result.asin) as { id: number } | null; - if (!row) throw new Error(`Failed to load stalker scan row for ${result.asin}`); + if (!row) + throw new Error(`Failed to load stalker scan row for ${result.asin}`); return row.id; } @@ -978,7 +1007,9 @@ function upsertSellerInventory( item.sellability?.sellabilityReason ?? null, item.productDetails?.title ?? null, item.productDetails?.brand ?? null, - item.productDetails ? JSON.stringify(item.productDetails.categoryTree) : null, + item.productDetails + ? JSON.stringify(item.productDetails.categoryTree) + : null, item.productDetails?.currentPrice ?? null, item.productDetails?.avgPrice90 ?? null, item.productDetails?.salesRank ?? null, @@ -989,7 +1020,9 @@ function upsertSellerInventory( : item.productDetails.amazonIsSeller ? 1 : 0, - item.productDetails ? JSON.stringify(item.productDetails.rawProduct) : null, + item.productDetails + ? JSON.stringify(item.productDetails.rawProduct) + : null, fetchedAt, JSON.stringify(item.rawInventory), ); @@ -1012,7 +1045,10 @@ function startStalkerRun( return result.lastInsertRowid as number; } -function startStalkerAnalysisRun(database: Database, inputFile: string): number { +function startStalkerAnalysisRun( + database: Database, + inputFile: string, +): number { const result = database .prepare( `INSERT INTO category_analysis_runs ( @@ -1231,18 +1267,24 @@ function normalizeSellerResponse( if (!sellers) return []; if (Array.isArray(sellers)) { return sellers - .map((seller) => [ - normalizeSellerId(seller.sellerId ?? seller.sellerID ?? seller.id), - seller, - ] as [string | null, Record]) + .map( + (seller) => + [ + normalizeSellerId(seller.sellerId ?? seller.sellerID ?? seller.id), + seller, + ] as [string | null, Record], + ) .filter((entry): entry is [string, Record] => !!entry[0]); } return Object.entries(sellers) - .map(([sellerId, seller]) => [ - normalizeSellerId(sellerId), - seller, - ] as [string | null, Record]) + .map( + ([sellerId, seller]) => + [normalizeSellerId(sellerId), seller] as [ + string | null, + Record, + ], + ) .filter((entry): entry is [string, Record] => !!entry[0]); } @@ -1253,14 +1295,15 @@ function parseSeller( ): StalkerSeller { const allStorefrontItems = extractStorefrontItems(seller); const storefrontItems = - inventoryLimit === 0 - ? [] - : allStorefrontItems.slice(0, inventoryLimit); + inventoryLimit === 0 ? [] : allStorefrontItems.slice(0, inventoryLimit); const storefrontAsins = storefrontItems.map((item) => item.asin); return { sellerId, sellerName: extractString( - seller.sellerName ?? seller.name ?? seller.storeName ?? seller.businessName, + seller.sellerName ?? + seller.name ?? + seller.storeName ?? + seller.businessName, ), rating: extractNumber( seller.currentRating ?? seller.rating ?? seller.feedbackRating, @@ -1279,7 +1322,9 @@ function parseSeller( }; } -function extractStorefrontItems(seller: Record): StalkerInventoryItem[] { +function extractStorefrontItems( + seller: Record, +): StalkerInventoryItem[] { const candidates = [ seller.asinList, seller.asins, @@ -1311,7 +1356,12 @@ function collectStorefrontItems( const asin = normalizeAsin((value as Record).asin); if (asin && !seen.has(asin)) { seen.add(asin); - items.push({ asin, rawInventory: value, sellability: null, productDetails: null }); + items.push({ + asin, + rawInventory: value, + sellability: null, + productDetails: null, + }); } return; } @@ -1319,7 +1369,12 @@ function collectStorefrontItems( const asin = normalizeAsin(value); if (!asin || seen.has(asin)) return; seen.add(asin); - items.push({ asin, rawInventory: { asin }, sellability: null, productDetails: null }); + items.push({ + asin, + rawInventory: { asin }, + sellability: null, + productDetails: null, + }); } function parseInventoryProductDetails( @@ -1331,9 +1386,9 @@ function parseInventoryProductDetails( title: extractString(product.title), brand: extractString(product.brand ?? product.manufacturer), categoryTree: - product.categoryTree?.map((category: { name?: unknown }) => - extractString(category.name), - ).filter((name: string | null): name is string => !!name) ?? [], + product.categoryTree + ?.map((category: { name?: unknown }) => extractString(category.name)) + .filter((name: string | null): name is string => !!name) ?? [], currentPrice: extractCurrentPrice(csv), avgPrice90: stats?.avg?.[0] != null ? stats.avg[0] / 100 : null, salesRank: extractNumber(stats?.current?.[3]), @@ -1371,10 +1426,14 @@ function resolveAmazonIsSeller( stats: Record | undefined, csv: unknown, ): boolean | null { - if (typeof product.isAmazonSeller === "boolean") return product.isAmazonSeller; + if (typeof product.isAmazonSeller === "boolean") + return product.isAmazonSeller; if (typeof product.availabilityAmazon === "number") { if (product.availabilityAmazon >= 0) return true; - if (product.availabilityAmazon === -1 || product.availabilityAmazon === -2) { + if ( + product.availabilityAmazon === -1 || + product.availabilityAmazon === -2 + ) { return false; } } @@ -1437,21 +1496,27 @@ async function runSellableAnalysisChild( stalkerRunId: number, analysisRunId: number, asins: string[], + useClaude: boolean, ): Promise { + const cmd = [ + "bun", + "run", + "src/stalker-analyze.ts", + "--db", + dbPath, + "--stalker-run-id", + String(stalkerRunId), + "--analysis-run-id", + String(analysisRunId), + "--asins", + asins.join(","), + ]; + if (useClaude) { + cmd.push("--claude"); + } + const child = Bun.spawn({ - cmd: [ - "bun", - "run", - "src/stalker-analyze.ts", - "--db", - dbPath, - "--stalker-run-id", - String(stalkerRunId), - "--analysis-run-id", - String(analysisRunId), - "--asins", - asins.join(","), - ], + cmd, stdout: "inherit", stderr: "inherit", }); @@ -1493,7 +1558,8 @@ function extractNumber(value: unknown): number | null { function extractBoolean(value: unknown): boolean | null { if (typeof value === "boolean") return value; - if (typeof value === "number") return value === 1 ? true : value === 0 ? false : null; + if (typeof value === "number") + return value === 1 ? true : value === 0 ? false : null; if (typeof value !== "string") return null; const normalized = value.trim().toLowerCase(); if (["1", "true", "yes"].includes(normalized)) return true; @@ -1502,7 +1568,10 @@ function extractBoolean(value: unknown): boolean | null { } function normalizeHeader(value: string): string { - return value.toLowerCase().trim().replace(/[^a-z0-9]/g, ""); + return value + .toLowerCase() + .trim() + .replace(/[^a-z0-9]/g, ""); } function readFlagValue(args: string[], flag: string): string | undefined { @@ -1518,7 +1587,7 @@ function hasFlag(args: string[], flag: string): boolean { function printUsageAndExit(message: string): never { console.error(message); console.error( - "Usage: bun run stalker --input input/asins.xlsx [--db db/results.db] [--max-asins N] [--offer-limit 100] [--seller-limit 30] [--inventory-limit 200] [--storefront-update-hours 168] [--seller-cache-hours 168] [--max-seller-requests N] [--sellability] [--analyze-sellable] [--include-stock] [--dry-run] [--no-resume]", + "Usage: bun run stalker --input input/asins.xlsx [--db db/results.db] [--max-asins N] [--offer-limit 100] [--seller-limit 30] [--inventory-limit 200] [--storefront-update-hours 168] [--seller-cache-hours 168] [--max-seller-requests N] [--sellability] [--analyze-sellable] [--include-stock] [--dry-run] [--no-resume] [--claude]", ); process.exit(1); } @@ -1562,7 +1631,9 @@ function computeWaitMsFromRefill(refillIn?: number): number { ); } - return Math.ceil((1 / Math.max(1, refillRate)) * 60_000) + KEEP_RETRY_BUFFER_MS; + return ( + Math.ceil((1 / Math.max(1, refillRate)) * 60_000) + KEEP_RETRY_BUFFER_MS + ); } function parseErrorPayload(text: string): KeepaApiResponse | null { diff --git a/src/top-monthly-sold-by-category.ts b/src/top-monthly-sold-by-category.ts index 0645119..547aa5c 100644 --- a/src/top-monthly-sold-by-category.ts +++ b/src/top-monthly-sold-by-category.ts @@ -28,6 +28,7 @@ type ParsedArgs = { categoryCandidatePool: number; minMonthlySold: number; blacklistFile: string; + useClaude: boolean; }; type CategoryRunSummary = { @@ -76,6 +77,7 @@ function log( function parseArgs(): ParsedArgs { const args = process.argv.slice(2); + const useClaude = hasFlag(args, "--claude"); const outputDir = readFlagValue(args, "--out-dir") ?? path.join(process.cwd(), "output"); const blacklistFile = @@ -131,9 +133,14 @@ function parseArgs(): ParsedArgs { categoryCandidatePool, minMonthlySold, blacklistFile, + useClaude, }; } +function hasFlag(args: string[], flag: string): boolean { + return args.includes(flag); +} + function readFlagValue(args: string[], flag: string): string | undefined { const idx = args.indexOf(flag); if (idx === -1) return undefined; @@ -149,7 +156,7 @@ function printUsageAndExit(message: string): never { "error", [ "Usage:", - " bun run src/top-monthly-sold-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 300] [--out-dir output] [--blacklist-file category-blacklist.csv]", + " bun run src/top-monthly-sold-by-category.ts [--category-limit 32] [--per-category-top 100] [--category-candidate-pool 500] [--min-monthly-sold 300] [--out-dir output] [--blacklist-file category-blacklist.csv] [--claude]", "", "Flow:", " 1) Discover categories and round-robin selection.", @@ -1066,6 +1073,7 @@ export async function processCategory( perCategoryTop: number, categoryCandidatePool: number, minMonthlySold: number, + useClaude = false, ): Promise { log("info", `\nCategory ${category.label} (${category.id})`); @@ -1200,7 +1208,7 @@ export async function processCategory( let batchVerdicts: LlmVerdict[]; try { - batchVerdicts = await analyzeProducts(batch); + batchVerdicts = await analyzeProducts(batch, { useClaude }); } catch (err) { const message = err instanceof Error ? err.message : String(err); log("warn", ` LLM batch failed: ${message}`); @@ -1286,7 +1294,7 @@ export async function main(): Promise { mkdirSync(args.outputDir, { recursive: true }); const DB_PATH = - process.env.RESULTS_DB_PATH || path.join(process.cwd(), "db", "results.db"); + process.env.RESULTS_DB_PATH || path.join(process.cwd(), "db", "results.db"); initDb(DB_PATH); const db = getDb(DB_PATH); @@ -1348,6 +1356,7 @@ export async function main(): Promise { args.perCategoryTop, args.categoryCandidatePool, args.minMonthlySold, + args.useClaude, ); totalInsertedAsins += categorySummary.results?.length ?? 0;