diff --git a/.env.example b/.env.example index eb6f582..9798ca8 100644 --- a/.env.example +++ b/.env.example @@ -12,4 +12,7 @@ AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key REDIS_URL=redis://localhost:6379 LLM_URL=http://localhost:1234/v1 LLM_MODEL=default -CACHE_TTL=86400 +CACHE_TTL=86400 +GOOGLE_API_KEY=your_google_api_key +GOOGLE_CSE_ID=your_google_programmable_search_engine_id +SERPAPI_API_KEY=your_serpapi_api_key_for_google_shopping diff --git a/package.json b/package.json index 4c95933..3e898aa 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "monthly-sold": "bun run src/top-monthly-sold-by-category.ts", "mid-range": "bun run src/mid-range-sellers-by-category.ts", "stalker": "bun run src/stalker.ts", + "search-offers": "bun run src/asin-offer-search.ts", "upc": "bun run src/upc-lookup.ts", "upc-file": "bun run src/upc-file-analysis.ts", "start": "bun run src/index.ts", diff --git a/src/asin-offer-search.ts b/src/asin-offer-search.ts new file mode 100644 index 0000000..cea369b --- /dev/null +++ b/src/asin-offer-search.ts @@ -0,0 +1,134 @@ +import { searchProductOffers, type SearxngOfferSearchResult } from "./searxng.ts"; + +type CliArgs = { + query: string; + json: boolean; + provider?: "serpapi" | "google-custom-search" | "searxng"; + categories?: string; + engines?: string; + limit?: number; +}; + +function readFlagValue(args: string[], flag: string): string | undefined { + const equalsArg = args.find((arg) => arg.startsWith(`${flag}=`)); + if (equalsArg) return equalsArg.slice(flag.length + 1); + + const index = args.indexOf(flag); + return index === -1 ? undefined : args[index + 1]; +} + +function parseArgs(args: string[]): CliArgs { + const json = args.includes("--json"); + const shopping = args.includes("--shopping"); + const providerRaw = readFlagValue(args, "--provider"); + const engineRaw = readFlagValue(args, "--engine"); + const categoryRaw = readFlagValue(args, "--category"); + const limitRaw = readFlagValue(args, "--limit"); + const limit = limitRaw == null ? undefined : Number(limitRaw); + const categories = categoryRaw ?? (shopping ? "shopping" : undefined); + const provider = normalizeProvider(providerRaw); + + const queryParts = args.filter((arg, index) => { + if (arg.startsWith("--")) return false; + const previous = args[index - 1]; + return ( + previous !== "--limit" && + previous !== "--category" && + previous !== "--engine" && + previous !== "--provider" + ); + }); + const query = queryParts.join(" ").trim(); + + if (!query) { + console.error( + 'Usage: bun run search-offers "product search terms" [--limit 10] [--provider serpapi|google-custom-search|searxng] [--json]', + ); + process.exit(1); + } + + if ( + limitRaw != null && + (limit == null || !Number.isInteger(limit) || limit <= 0) + ) { + console.error("--limit must be a positive integer."); + process.exit(1); + } + + return { + query, + json, + provider, + categories, + engines: engineRaw, + limit, + }; +} + +function printTable(results: SearxngOfferSearchResult[]): void { + if (results.length === 0) { + console.log("No offer results found."); + return; + } + + console.table( + results.map((result) => ({ + Rank: result.rank, + Score: result.score, + ASIN: result.matchedAsin ?? "", + Price: formatPrice(result), + "Price Label": result.detectedPriceLabel ?? "", + Domain: result.domain, + Title: result.title, + URL: result.url, + })), + ); +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + const options = { + maxResults: args.limit, + provider: args.provider, + categories: args.categories, + engines: args.engines, + }; + const results = await searchProductOffers(args.query, options); + + if (args.json) { + console.log(JSON.stringify(results, null, 2)); + return; + } + + printTable(results); +} + +function normalizeProvider( + value: string | undefined, +): "serpapi" | "google-custom-search" | "searxng" | undefined { + if (value == null) return undefined; + const provider = value.trim().toLowerCase(); + if (provider === "serpapi" || provider === "google-shopping") { + return "serpapi"; + } + if (provider === "google-custom-search") { + return "google-custom-search"; + } + if (provider === "searxng") return provider; + console.error("--provider must be one of: serpapi, google-custom-search, searxng"); + process.exit(1); +} + +function formatPrice(result: SearxngOfferSearchResult): string { + if (result.detectedPrice == null) return ""; + if (result.detectedPriceText) return result.detectedPriceText; + const currency = result.detectedPriceCurrency ?? "USD"; + return currency === "USD" + ? `$${result.detectedPrice}` + : `${currency} ${result.detectedPrice}`; +} + +main().catch((err) => { + console.error(`Search failed: ${err instanceof Error ? err.message : err}`); + process.exit(1); +}); diff --git a/src/config.ts b/src/config.ts index 7aa4692..08f3de6 100644 --- a/src/config.ts +++ b/src/config.ts @@ -18,10 +18,13 @@ function optionalBoolean(key: string, fallback: boolean): boolean { export const config = { keepaApiKey: required("KEEPA_API_KEY"), redisUrl: optional("REDIS_URL", "redis://localhost:6379"), - llmUrl: optional("LLM_URL", "http://localhost:1234/v1"), - llmModel: optional("LLM_MODEL", "default"), - cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10), - spApiClientId: Bun.env.SP_API_CLIENT_ID, + llmUrl: optional("LLM_URL", "http://localhost:1234/v1"), + llmModel: optional("LLM_MODEL", "default"), + cacheTtl: parseInt(optional("CACHE_TTL", "86400"), 10), + searxngUrl: optional("SEARXNG_URL", "https://searxng.nvictor.me/"), + searxngTimeoutMs: parseInt(optional("SEARXNG_TIMEOUT_MS", "10000"), 10), + searxngMaxResults: parseInt(optional("SEARXNG_MAX_RESULTS", "10"), 10), + spApiClientId: Bun.env.SP_API_CLIENT_ID, spApiClientSecret: Bun.env.SP_API_CLIENT_SECRET, spApiRefreshToken: Bun.env.SP_API_REFRESH_TOKEN, spApiRegion: optional("SP_API_REGION", "na"), diff --git a/src/searxng.test.ts b/src/searxng.test.ts new file mode 100644 index 0000000..6bd013f --- /dev/null +++ b/src/searxng.test.ts @@ -0,0 +1,350 @@ +import { afterAll, beforeEach, expect, mock, test } from "bun:test"; +import { normalizeAsin, searchProductOffers } from "./searxng.ts"; + +const originalFetch = globalThis.fetch; + +beforeEach(() => { + globalThis.fetch = originalFetch; +}); + +afterAll(() => { + globalThis.fetch = originalFetch; +}); + +test("normalizeAsin uppercases and validates ASINs", () => { + expect(normalizeAsin(" b07sn9bhvv ")).toBe("B07SN9BHVV"); + expect(() => normalizeAsin("not-an-asin")).toThrow("Invalid ASIN"); +}); + +test("searchProductOffers derives ASIN search behavior for ASIN-only queries", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.pathname).toBe("/search"); + expect(url.searchParams.get("format")).toBe("json"); + expect(url.searchParams.get("q")).toBe("B07SN9BHVV price sale offer buy online"); + + return Response.json({ + results: [ + { + title: "Amazon listing B07SN9BHVV", + url: "https://www.amazon.com/dp/B07SN9BHVV", + content: "Official marketplace listing.", + engines: ["duckduckgo"], + }, + { + title: "Romand palette offer", + url: "https://example-shop.com/item", + content: "Buy product ASIN B07SN9BHVV. Offer price: $12.99 today.", + engines: ["brave"], + }, + ], + }); + }); + + const results = await searchProductOffers("B07SN9BHVV", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + maxResults: 10, + }); + + expect(results).toHaveLength(2); + expect(results[0]?.domain).toBe("example-shop.com"); + expect(results[0]?.matchedAsin).toBe("B07SN9BHVV"); + expect(results[0]?.detectedPrice).toBe(12.99); + expect(results[0]?.detectedPriceCurrency).toBe("USD"); + expect(results[0]?.detectedPriceLabel).toBe("offer price"); + expect(results[0]?.detectedPriceText).toBe("$12.99"); + expect(results[0]?.engines).toEqual(["brave"]); + expect(fetchMock).toHaveBeenCalledTimes(1); +}); + +test("searchProductOffers falls back to HTML when JSON is unavailable", async () => { + const html = ` +
+ +

Supplier offer B07SN9BHVV

+

Wholesale product sale price: USD 9.50 with ASIN B07SN9BHVV.

+
duckduckgo
+
+ `; + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + if (url.searchParams.get("format") === "json") { + return new Response("forbidden", { status: 403 }); + } + return new Response(html, { + status: 200, + headers: { "content-type": "text/html" }, + }); + }); + + const results = await searchProductOffers("B07SN9BHVV", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.title).toBe("Supplier offer B07SN9BHVV"); + expect(results[0]?.domain).toBe("supplier.example"); + expect(results[0]?.detectedPrice).toBe(9.5); + expect(results[0]?.detectedPriceLabel).toBe("sale price"); + expect(results[0]?.detectedPriceText).toBe("USD 9.50"); + expect(results[0]?.matchedAsin).toBe("B07SN9BHVV"); + expect(results[0]?.engines).toEqual(["duckduckgo"]); + expect(fetchMock).toHaveBeenCalledTimes(2); +}); + +test("searchProductOffers detects common selling and sale price formats", async () => { + const fetchMock = mock(async () => + Response.json({ + results: [ + { + title: "Supplier page", + url: "https://supplier.example/item", + content: "Selling price is €18.75 and list price is $24.00.", + }, + { + title: "Backup page", + url: "https://backup.example/item", + content: "Available now for 22.10 USD.", + }, + ], + }), + ); + + const results = await searchProductOffers("romand palette price", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + maxResults: 2, + }); + + expect(results[0]?.detectedPrice).toBe(18.75); + expect(results[0]?.detectedPriceCurrency).toBe("EUR"); + expect(results[0]?.detectedPriceLabel).toBe("selling price"); + expect(results[1]?.detectedPrice).toBe(22.1); + expect(results[1]?.detectedPriceCurrency).toBe("USD"); +}); + +test("searchProductOffers filters unrelated priced results for ASIN-only queries", async () => { + const fetchMock = mock(async () => + Response.json({ + results: [ + { + title: "Unrelated deal", + url: "https://deals.example/phones", + content: "This price is $449 but it is for another product.", + }, + { + title: "Amazon listing B07SN9BHVV", + url: "https://www.amazon.in/dp/B07SN9BHVV", + content: "1 offer from ₹550.00 · Buying options.", + }, + ], + }), + ); + + const results = await searchProductOffers("B07SN9BHVV", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.matchedAsin).toBe("B07SN9BHVV"); + expect(results[0]?.detectedPrice).toBe(550); + expect(results[0]?.detectedPriceCurrency).toBe("INR"); + expect(results[0]?.detectedPriceText).toBe("₹550.00"); +}); + +test("searchProductOffers keeps arbitrary query strings generic", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.searchParams.get("q")).toBe("romand dry mango tulip price"); + + return Response.json({ + results: [ + { + title: "Generic result", + url: "https://shop.example/romand", + content: "Sale price: $14.25", + }, + ], + }); + }); + + const results = await searchProductOffers("romand dry mango tulip price", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.asin).toBeUndefined(); + expect(results[0]?.detectedPrice).toBe(14.25); +}); + +test("searchProductOffers sends configured categories", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.searchParams.get("categories")).toBe("shopping"); + + return Response.json({ + results: [ + { + title: "Shopping result", + url: "https://shop.example/item", + content: "Offer price: $10.00", + }, + ], + }); + }); + + const results = await searchProductOffers("romand price", { + provider: "searxng", + baseUrl: "https://searxng.test/", + categories: "shopping", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results[0]?.detectedPrice).toBe(10); +}); + +test("searchProductOffers sends configured SearXNG engines", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.searchParams.get("engines")).toBe("google"); + expect(url.searchParams.get("q")).toBe("!go romand price"); + + return Response.json({ + results: [ + { + title: "Google-backed result", + url: "https://shop.example/item", + content: "Offer price: $11.00", + engine: "google", + }, + ], + }); + }); + + const results = await searchProductOffers("romand price", { + provider: "searxng", + baseUrl: "https://searxng.test/", + engines: "google", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results[0]?.detectedPrice).toBe(11); + expect(results[0]?.engines).toEqual(["google"]); +}); + +test("searchProductOffers uses Google Custom Search API and pagemap offer prices", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.hostname).toBe("googleapis.test"); + expect(url.searchParams.get("key")).toBe("test-key"); + expect(url.searchParams.get("cx")).toBe("test-cx"); + expect(url.searchParams.get("num")).toBe("5"); + expect(url.searchParams.get("q")).toBe("romand dry mango tulip"); + + return Response.json({ + items: [ + { + title: "Romand Dry Mango Tulip", + link: "https://store.example/romand", + snippet: "Buy from Store Example.", + pagemap: { + offer: [{ price: "12.50", pricecurrency: "USD" }], + }, + }, + ], + }); + }); + + const results = await searchProductOffers("romand dry mango tulip", { + provider: "google-custom-search", + baseUrl: "https://googleapis.test/customsearch/v1", + googleApiKey: "test-key", + googleCx: "test-cx", + maxResults: 5, + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.title).toContain("Romand Dry Mango Tulip"); + expect(results[0]?.domain).toBe("store.example"); + expect(results[0]?.detectedPrice).toBe(12.5); + expect(results[0]?.detectedPriceLabel).toBe("offer price"); + expect(results[0]?.engines).toEqual(["google custom search"]); +}); + +test("searchProductOffers defaults to SerpApi Google Shopping results", async () => { + const fetchMock = mock(async (input: string | URL | Request) => { + const url = input instanceof URL ? input : new URL(String(input)); + expect(url.hostname).toBe("serpapi.test"); + expect(url.searchParams.get("engine")).toBe("google_shopping"); + expect(url.searchParams.get("q")).toBe("dry mango tulip price"); + expect(url.searchParams.get("api_key")).toBe("serpapi-key"); + expect(url.searchParams.get("gl")).toBe("us"); + expect(url.searchParams.get("hl")).toBe("en"); + + return Response.json({ + shopping_results: [ + { + position: 1, + title: "Romand Better Than Eyes Dry Mango Tulip", + source: "K-Beauty Store", + link: "https://store.example/products/romand", + price: "$13.40", + extracted_price: 13.4, + delivery: "$4.99 delivery", + rating: 4.7, + reviews: 128, + }, + ], + }); + }); + + const results = await searchProductOffers("dry mango tulip price", { + baseUrl: "https://serpapi.test/search.json", + serpapiApiKey: "serpapi-key", + fetchImpl: fetchMock as unknown as typeof fetch, + }); + + expect(results).toHaveLength(1); + expect(results[0]?.domain).toBe("store.example"); + expect(results[0]?.detectedPrice).toBe(13.4); + expect(results[0]?.detectedPriceText).toBe("$13.40"); + expect(results[0]?.engines).toEqual(["serpapi google shopping"]); +}); + +test("searchProductOffers applies result limits and handles empty results", async () => { + const fetchMock = mock(async () => + Response.json({ + results: [ + { title: "One", url: "https://one.example", content: "No price" }, + { title: "Two", url: "https://two.example", content: "$20.00" }, + ], + }), + ); + + const limited = await searchProductOffers("romand palette", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: fetchMock as unknown as typeof fetch, + maxResults: 1, + }); + expect(limited).toHaveLength(1); + expect(limited[0]?.domain).toBe("two.example"); + + const emptyFetch = mock(async () => Response.json({ results: [] })); + const empty = await searchProductOffers("missing product", { + provider: "searxng", + baseUrl: "https://searxng.test/", + fetchImpl: emptyFetch as unknown as typeof fetch, + }); + expect(empty).toEqual([]); +}); diff --git a/src/searxng.ts b/src/searxng.ts new file mode 100644 index 0000000..b6205e4 --- /dev/null +++ b/src/searxng.ts @@ -0,0 +1,777 @@ +const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/"; +const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL = + "https://www.googleapis.com/customsearch/v1"; +const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json"; +const DEFAULT_TIMEOUT_MS = 10_000; +const DEFAULT_MAX_RESULTS = 10; +const ASIN_REGEX = /^B[0-9A-Z]{9}$/; +const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi; +const PRICE_LABELS = [ + "selling price", + "sale price", + "offer price", + "current price", + "our price", + "list price", + "price", +] as const; +const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD"; +const CURRENCY_SYMBOLS = "$€£₹"; +const LABELED_PRICE_REGEX = + new RegExp( + `\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`, + "gi", + ); +const PRICE_REGEX = new RegExp( + `((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`, + "gi", +); + +export type SearxngOfferSearchResult = { + asin?: string; + query: string; + title: string; + url: string; + domain: string; + snippet: string; + rank: number; + score: number; + matchedAsin?: string; + detectedPrice?: number; + detectedPriceCurrency?: string; + detectedPriceLabel?: string; + detectedPriceText?: string; + engines: string[]; +}; + +export type SearxngSearchOptions = { + provider?: "serpapi" | "google-custom-search" | "searxng"; + baseUrl?: string; + googleApiKey?: string; + googleCx?: string; + serpapiApiKey?: string; + timeoutMs?: number; + maxResults?: number; + page?: number; + categories?: string; + engines?: string; + includeUnmatchedAsinResults?: boolean; + fetchImpl?: typeof fetch; +}; + +type RawSearchResult = { + title: string; + url: string; + snippet: string; + engines: string[]; + rank: number; +}; + +type JsonSearchResponse = { + results?: Array>; +}; + +type PriceDetection = { + amount: number; + currency: string; + text: string; + label?: string; +}; + +export async function searchAsinOffers( + asin: string, + options: SearxngSearchOptions = {}, +): Promise { + return searchProductOffers(normalizeAsin(asin), options); +} + +export async function searchProductOffers( + query: string, + options: SearxngSearchOptions = {}, +): Promise { + const normalizedQuery = query.trim(); + if (!normalizedQuery) { + throw new Error("Search query is required."); + } + + const inferredAsin = getAsinQuery(normalizedQuery); + const searxngQuery = inferredAsin + ? `${inferredAsin} price sale offer buy online` + : normalizedQuery; + const maxResults = positiveInteger( + options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS), + DEFAULT_MAX_RESULTS, + ); + const rawResults = + options.provider === "searxng" + ? await fetchSearxngResults(searxngQuery, options) + : options.provider === "google-custom-search" + ? await fetchGoogleCustomSearchResults(searxngQuery, { + ...options, + maxResults, + }) + : await fetchSerpApiGoogleShoppingResults(searxngQuery, { + ...options, + provider: "serpapi", + maxResults, + }); + return rawResults + .map((result) => normalizeResult(result, searxngQuery, inferredAsin)) + .filter((result) => { + if (!result.url) return false; + if (!inferredAsin || options.includeUnmatchedAsinResults) return true; + return result.matchedAsin === inferredAsin; + }) + .sort((a, b) => b.score - a.score || a.rank - b.rank) + .slice(0, maxResults); +} + +export function normalizeAsin(value: string): string { + const asin = value.trim().toUpperCase(); + if (!ASIN_REGEX.test(asin)) { + throw new Error(`Invalid ASIN: ${value}`); + } + return asin; +} + +function getAsinQuery(value: string): string | undefined { + const normalized = value.trim().toUpperCase(); + return ASIN_REGEX.test(normalized) ? normalized : undefined; +} + +async function fetchSearxngResults( + query: string, + options: SearxngSearchOptions, +): Promise { + const baseUrl = normalizeBaseUrl( + options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL, + ); + const timeoutMs = positiveInteger( + options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), + DEFAULT_TIMEOUT_MS, + ); + const page = positiveInteger(options.page ?? 1, 1); + const categories = options.categories ?? "general"; + const fetchImpl = options.fetchImpl ?? fetch; + const requestQuery = applySearxngEngineBang(query, options.engines); + + const jsonUrl = buildSearchUrl(baseUrl, requestQuery, { + categories, + engines: options.engines, + page, + format: "json", + }); + const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs); + if (isJsonResponse(jsonResponse)) { + const json = (await jsonResponse.json()) as JsonSearchResponse; + return parseJsonResults(json); + } + + const htmlUrl = buildSearchUrl(baseUrl, requestQuery, { + categories, + engines: options.engines, + page, + }); + const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs); + if (!htmlResponse.ok) { + throw new Error( + `SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`, + ); + } + + return parseHtmlResults(await htmlResponse.text()); +} + +function applySearxngEngineBang(query: string, engines: string | undefined): string { + if (!engines || query.trim().startsWith("!")) return query; + const engineList = engines + .split(",") + .map((engine) => engine.trim().toLowerCase()) + .filter(Boolean); + if (engineList.length !== 1) return query; + + const shortcut = searxngEngineShortcut(engineList[0]!); + return shortcut ? `!${shortcut} ${query}` : query; +} + +function searxngEngineShortcut(engine: string): string | undefined { + if (engine === "google") return "go"; + return undefined; +} + +function isJsonResponse(response: Response): boolean { + const contentType = response.headers.get("content-type") ?? ""; + return response.ok && contentType.toLowerCase().includes("application/json"); +} + +async function fetchWithTimeout( + fetchImpl: typeof fetch, + url: URL, + timeoutMs: number, +): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetchImpl(url, { + signal: controller.signal, + headers: { + accept: "application/json,text/html;q=0.9,*/*;q=0.8", + "user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)", + }, + }); + } finally { + clearTimeout(timeout); + } +} + +function buildSearchUrl( + baseUrl: URL, + query: string, + params: { categories: string; engines?: string; page: number; format?: string }, +): URL { + const url = new URL("search", baseUrl); + url.searchParams.set("q", query); + url.searchParams.set("categories", params.categories); + if (params.engines) { + url.searchParams.set("engines", params.engines); + } + url.searchParams.set("pageno", String(params.page)); + if (params.format) { + url.searchParams.set("format", params.format); + } + return url; +} + +async function fetchGoogleCustomSearchResults( + query: string, + options: SearxngSearchOptions, +): Promise { + const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY; + const cx = + options.googleCx ?? + Bun.env.GOOGLE_CSE_ID ?? + Bun.env.GOOGLE_CX ?? + Bun.env.GOOGLE_SEARCH_ENGINE_ID; + if (!apiKey) { + throw new Error("Missing GOOGLE_API_KEY for Google Custom Search."); + } + if (!cx) { + throw new Error( + "Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.", + ); + } + + const timeoutMs = positiveInteger( + options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), + DEFAULT_TIMEOUT_MS, + ); + const page = positiveInteger(options.page ?? 1, 1); + const num = Math.min( + 10, + positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS), + ); + const fetchImpl = options.fetchImpl ?? fetch; + const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL); + url.searchParams.set("key", apiKey); + url.searchParams.set("cx", cx); + url.searchParams.set("q", query); + url.searchParams.set("num", String(num)); + url.searchParams.set("start", String((page - 1) * num + 1)); + + const response = await fetchWithTimeout(fetchImpl, url, timeoutMs); + if (!response.ok) { + const body = await response.text().catch(() => ""); + throw new Error( + `Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`, + ); + } + + const json = (await response.json()) as GoogleCustomSearchResponse; + return parseGoogleCustomSearchResults(json); +} + +type GoogleCustomSearchResponse = { + items?: GoogleCustomSearchItem[]; +}; + +type GoogleCustomSearchItem = { + title?: string; + link?: string; + snippet?: string; + displayLink?: string; + pagemap?: Record; +}; + +type SerpApiShoppingResponse = { + shopping_results?: SerpApiShoppingResult[]; + inline_shopping_results?: SerpApiShoppingResult[]; + categorized_shopping_results?: Array<{ + shopping_results?: SerpApiShoppingResult[]; + }>; + error?: string; +}; + +type SerpApiShoppingResult = { + position?: number; + title?: string; + source?: string; + link?: string; + product_link?: string; + serpapi_product_api?: string; + price?: string; + extracted_price?: number; + old_price?: string; + extracted_old_price?: number; + delivery?: string; + rating?: number; + reviews?: number; + snippet?: string; +}; + +async function fetchSerpApiGoogleShoppingResults( + query: string, + options: SearxngSearchOptions, +): Promise { + const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY; + if (!apiKey) { + throw new Error( + "Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.", + ); + } + + const timeoutMs = positiveInteger( + options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), + DEFAULT_TIMEOUT_MS, + ); + const page = positiveInteger(options.page ?? 1, 1); + const fetchImpl = options.fetchImpl ?? fetch; + const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL); + url.searchParams.set("engine", "google_shopping"); + url.searchParams.set("q", query); + url.searchParams.set("api_key", apiKey); + url.searchParams.set("google_domain", "google.com"); + url.searchParams.set("gl", "us"); + url.searchParams.set("hl", "en"); + url.searchParams.set("start", String((page - 1) * 60)); + + const response = await fetchWithTimeout(fetchImpl, url, timeoutMs); + if (!response.ok) { + const body = await response.text().catch(() => ""); + throw new Error( + `SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`, + ); + } + + const json = (await response.json()) as SerpApiShoppingResponse; + if (json.error) { + throw new Error(`SerpApi Google Shopping failed: ${json.error}`); + } + + return parseSerpApiShoppingResults(json); +} + +function parseSerpApiShoppingResults( + json: SerpApiShoppingResponse, +): RawSearchResult[] { + const results = [ + ...(json.shopping_results ?? []), + ...(json.inline_shopping_results ?? []), + ...(json.categorized_shopping_results ?? []).flatMap( + (category) => category.shopping_results ?? [], + ), + ]; + + return results.flatMap((item, index) => { + const url = + optionalString(item.link) ?? + optionalString(item.product_link) ?? + optionalString(item.serpapi_product_api); + if (!url) return []; + + const priceText = optionalString(item.price); + const snippet = [ + priceText ? `offer price: ${priceText}` : undefined, + optionalString(item.old_price) + ? `list price: ${item.old_price}` + : undefined, + optionalString(item.source) ? `merchant: ${item.source}` : undefined, + optionalString(item.delivery), + optionalString(item.snippet), + typeof item.rating === "number" ? `rating: ${item.rating}` : undefined, + typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined, + ] + .filter((value): value is string => !!value) + .join(" "); + + return [ + { + title: optionalString(item.title) ?? "", + url, + snippet, + engines: ["serpapi google shopping"], + rank: item.position ?? index + 1, + }, + ]; + }); +} + +function parseGoogleCustomSearchResults( + json: GoogleCustomSearchResponse, +): RawSearchResult[] { + return (json.items ?? []).flatMap((item, index) => { + const url = optionalString(item.link); + if (!url) return []; + const metadataText = extractGoogleCustomSearchMetadataText(item); + return [ + { + title: optionalString(item.title) ?? "", + url, + snippet: [optionalString(item.snippet), metadataText] + .filter((value): value is string => !!value) + .join(" "), + engines: ["google custom search"], + rank: index + 1, + }, + ]; + }); +} + +function extractGoogleCustomSearchMetadataText( + item: GoogleCustomSearchItem, +): string { + const pagemap = item.pagemap ?? {}; + const chunks: string[] = []; + + for (const offer of readPagemapObjects(pagemap.offer)) { + appendPriceMetadata(chunks, offer); + } + for (const product of readPagemapObjects(pagemap.product)) { + appendPriceMetadata(chunks, product); + } + for (const metatag of readPagemapObjects(pagemap.metatags)) { + appendPriceMetadata(chunks, metatag); + } + + return chunks.join(" "); +} + +function appendPriceMetadata(chunks: string[], value: Record): void { + const price = + optionalString(value.price) ?? + optionalString(value.lowprice) ?? + optionalString(value.highprice) ?? + optionalString(value["product:price:amount"]) ?? + optionalString(value["og:price:amount"]) ?? + optionalString(value["twitter:data1"]); + if (!price) return; + + const currency = + optionalString(value.pricecurrency) ?? + optionalString(value.priceCurrency) ?? + optionalString(value["product:price:currency"]) ?? + optionalString(value["og:price:currency"]); + chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`); +} + +function readPagemapObjects(value: unknown): Array> { + if (!Array.isArray(value)) return []; + return value.filter( + (item): item is Record => + item != null && typeof item === "object" && !Array.isArray(item), + ); +} + +function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] { + return (json.results ?? []).flatMap((result, index) => { + const url = optionalString(result.url); + if (!url) return []; + return [ + { + title: optionalString(result.title) ?? "", + url, + snippet: optionalString(result.content) ?? "", + engines: normalizeEngines(result.engines ?? result.engine), + rank: index + 1, + }, + ]; + }); +} + +async function parseHtmlResults(html: string): Promise { + type Draft = { + title: string; + url: string; + snippet: string; + engines: string[]; + }; + + const results: RawSearchResult[] = []; + let current: Draft | null = null; + let currentTextTarget: "title" | "snippet" | "engine" | null = null; + + const appendText = (text: string) => { + if (!current || !currentTextTarget) return; + const normalized = text.replace(/\s+/g, " ").trim(); + if (!normalized) return; + + if (currentTextTarget === "engine") { + current.engines.push(normalized); + return; + } + + current[currentTextTarget] = appendWithSpace( + current[currentTextTarget], + normalized, + ); + }; + + const response = new HTMLRewriter() + .on("article.result", { + element(element) { + current = { title: "", url: "", snippet: "", engines: [] }; + const onEndTag = (element as unknown as { + onEndTag?: (handler: () => void) => void; + }).onEndTag; + onEndTag?.call(element, () => { + if (current?.url) { + results.push({ ...current, rank: results.length + 1 }); + } + current = null; + currentTextTarget = null; + }); + }, + }) + .on("article.result a.url_header", { + element(element) { + if (current && !current.url) { + current.url = element.getAttribute("href") ?? ""; + } + }, + }) + .on("article.result h3 a", { + element(element) { + if (current && !current.url) { + current.url = element.getAttribute("href") ?? ""; + } + currentTextTarget = "title"; + }, + text(text) { + appendText(text.text); + if (text.lastInTextNode) currentTextTarget = null; + }, + }) + .on("article.result p.content", { + text(text) { + currentTextTarget = "snippet"; + appendText(text.text); + if (text.lastInTextNode) currentTextTarget = null; + }, + }) + .on("article.result .engines span", { + text(text) { + currentTextTarget = "engine"; + appendText(text.text); + if (text.lastInTextNode) currentTextTarget = null; + }, + }) + .transform(new Response(html)); + + await response.text(); + return results; +} + +function normalizeResult( + raw: RawSearchResult, + query: string, + asin?: string, +): SearxngOfferSearchResult { + const url = normalizeUrl(raw.url); + const domain = extractDomain(url); + const title = normalizeText(raw.title); + const snippet = normalizeText(raw.snippet); + const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`); + const detectedPrice = detectPrice(`${title} ${snippet}`); + const score = scoreResult({ + asin, + matchedAsin, + detectedPrice: detectedPrice?.amount, + domain, + rank: raw.rank, + }); + + return { + ...(asin ? { asin } : {}), + query, + title, + url, + domain, + snippet, + rank: raw.rank, + score, + ...(matchedAsin ? { matchedAsin } : {}), + ...(detectedPrice + ? { + detectedPrice: detectedPrice.amount, + detectedPriceCurrency: detectedPrice.currency, + ...(detectedPrice.label + ? { detectedPriceLabel: detectedPrice.label } + : {}), + detectedPriceText: detectedPrice.text, + } + : {}), + engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)), + }; +} + +function scoreResult(input: { + asin?: string; + matchedAsin?: string; + detectedPrice?: number; + domain: string; + rank: number; +}): number { + let score = 100 - input.rank; + if (input.asin && input.matchedAsin === input.asin) score += 80; + if (input.matchedAsin && !input.asin) score += 40; + if (input.detectedPrice != null) score += 30; + if (input.domain && !isAmazonDomain(input.domain)) score += 20; + if (isAmazonDomain(input.domain)) score -= 15; + return score; +} + +function normalizeBaseUrl(value: string): URL { + const url = new URL(value); + if (!url.pathname.endsWith("/")) { + url.pathname = `${url.pathname}/`; + } + return url; +} + +function normalizeUrl(value: string): string { + try { + return new URL(value).toString(); + } catch { + return value.trim(); + } +} + +function extractDomain(value: string): string { + try { + return new URL(value).hostname.replace(/^www\./i, "").toLowerCase(); + } catch { + return ""; + } +} + +function isAmazonDomain(domain: string): boolean { + return /(^|\.)amazon\./i.test(domain); +} + +function findMatchedAsin(value: string): string | undefined { + const match = value.toUpperCase().match(ASIN_MATCH_REGEX); + return match?.[0]; +} + +function detectPrice(value: string): PriceDetection | undefined { + const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX)) + .map((match) => parsePriceMatch(match[2], match[1])) + .filter((price): price is PriceDetection => !!price) + .sort(comparePriceDetections); + if (labeledCandidates[0]) return labeledCandidates[0]; + + const candidates = Array.from(value.matchAll(PRICE_REGEX)) + .map((match) => parsePriceMatch(match[1])) + .filter((price): price is PriceDetection => !!price); + return candidates[0]; +} + +function parsePriceMatch( + rawPrice: string | undefined, + rawLabel?: string, +): PriceDetection | undefined { + if (!rawPrice) return undefined; + + const text = normalizeText(rawPrice); + const currency = detectCurrency(text); + const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/); + if (!amountMatch?.[0]) return undefined; + + const amount = Number(amountMatch[0].replace(/,/g, "")); + if (!Number.isFinite(amount) || amount <= 0) return undefined; + + const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined; + return { + amount, + currency, + text, + ...(label ? { label } : {}), + }; +} + +function comparePriceDetections(a: PriceDetection, b: PriceDetection): number { + return priceLabelRank(a.label) - priceLabelRank(b.label); +} + +function priceLabelRank(label: string | undefined): number { + if (!label) return PRICE_LABELS.length; + const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]); + return index === -1 ? PRICE_LABELS.length : index; +} + +function detectCurrency(value: string): string { + if (/\b(EUR)\b|€/i.test(value)) return "EUR"; + if (/\b(GBP)\b|£/i.test(value)) return "GBP"; + if (/\b(INR)\b|₹/i.test(value)) return "INR"; + if (/\b(CAD)\b/i.test(value)) return "CAD"; + if (/\b(AUD)\b/i.test(value)) return "AUD"; + return "USD"; +} + +function escapeForCharClass(value: string): string { + return value.replace(/[-\\\]^]/g, "\\$&"); +} + +function normalizeEngines(value: unknown): string[] { + if (Array.isArray(value)) { + return value.map(String).filter(Boolean); + } + const engine = optionalString(value); + return engine ? [engine] : []; +} + +function optionalString(value: unknown): string | undefined { + if (value == null) return undefined; + const text = String(value).trim(); + return text ? text : undefined; +} + +function normalizeText(value: string): string { + return decodeHtmlEntities(value).replace(/\s+/g, " ").trim(); +} + +function appendWithSpace(left: string, right: string): string { + return left ? `${left} ${right}` : right; +} + +function decodeHtmlEntities(value: string): string { + return value + .replace(/&/g, "&") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/ /g, " "); +} + +function dedupe(values: string[]): string[] { + return Array.from(new Set(values)); +} + +function readEnvInt(key: string, fallback: number): number { + const parsed = Number(Bun.env[key]); + return Number.isFinite(parsed) ? parsed : fallback; +} + +function positiveInteger(value: number, fallback: number): number { + return Number.isInteger(value) && value > 0 ? value : fallback; +}