const DEFAULT_SEARXNG_URL = "https://searxng.nvictor.me/"; const DEFAULT_GOOGLE_CUSTOM_SEARCH_URL = "https://www.googleapis.com/customsearch/v1"; const DEFAULT_SERPAPI_URL = "https://serpapi.com/search.json"; const DEFAULT_TIMEOUT_MS = 10_000; const DEFAULT_MAX_RESULTS = 10; const ASIN_REGEX = /^B[0-9A-Z]{9}$/; const ASIN_MATCH_REGEX = /\bB[0-9A-Z]{9}\b/gi; const PRICE_LABELS = [ "selling price", "sale price", "offer price", "current price", "our price", "list price", "price", ] as const; const CURRENCY_CODES = "USD|US\\$|EUR|GBP|INR|CAD|AUD"; const CURRENCY_SYMBOLS = "$€£₹"; const LABELED_PRICE_REGEX = new RegExp( `\\b(selling price|sale price|offer price|current price|our price|list price|price)\\b[^${escapeForCharClass(CURRENCY_SYMBOLS)}0-9]{0,24}((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?)`, "gi", ); const PRICE_REGEX = new RegExp( `((?:${CURRENCY_CODES})?\\s*[${escapeForCharClass(CURRENCY_SYMBOLS)}]\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|(?:${CURRENCY_CODES})\\s*[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?|[0-9]{1,5}(?:,[0-9]{3})*(?:\\.[0-9]{2})?\\s*(?:${CURRENCY_CODES}))`, "gi", ); export type SearxngOfferSearchResult = { asin?: string; query: string; title: string; url: string; domain: string; snippet: string; rank: number; score: number; matchedAsin?: string; detectedPrice?: number; detectedPriceCurrency?: string; detectedPriceLabel?: string; detectedPriceText?: string; engines: string[]; }; export type SearxngSearchOptions = { provider?: "serpapi" | "google-custom-search" | "searxng"; baseUrl?: string; googleApiKey?: string; googleCx?: string; serpapiApiKey?: string; timeoutMs?: number; maxResults?: number; page?: number; categories?: string; engines?: string; includeUnmatchedAsinResults?: boolean; fetchImpl?: typeof fetch; }; type RawSearchResult = { title: string; url: string; snippet: string; engines: string[]; rank: number; }; type JsonSearchResponse = { results?: Array>; }; type PriceDetection = { amount: number; currency: string; text: string; label?: string; }; export async function searchAsinOffers( asin: string, options: SearxngSearchOptions = {}, ): Promise { return searchProductOffers(normalizeAsin(asin), options); } export async function searchProductOffers( query: string, options: SearxngSearchOptions = {}, ): Promise { const normalizedQuery = query.trim(); if (!normalizedQuery) { throw new Error("Search query is required."); } const inferredAsin = getAsinQuery(normalizedQuery); const searxngQuery = inferredAsin ? `${inferredAsin} price sale offer buy online` : normalizedQuery; const maxResults = positiveInteger( options.maxResults ?? readEnvInt("SEARXNG_MAX_RESULTS", DEFAULT_MAX_RESULTS), DEFAULT_MAX_RESULTS, ); const rawResults = options.provider === "searxng" ? await fetchSearxngResults(searxngQuery, options) : options.provider === "google-custom-search" ? await fetchGoogleCustomSearchResults(searxngQuery, { ...options, maxResults, }) : await fetchSerpApiGoogleShoppingResults(searxngQuery, { ...options, provider: "serpapi", maxResults, }); return rawResults .map((result) => normalizeResult(result, searxngQuery, inferredAsin)) .filter((result) => { if (!result.url) return false; if (!inferredAsin || options.includeUnmatchedAsinResults) return true; return result.matchedAsin === inferredAsin; }) .sort((a, b) => b.score - a.score || a.rank - b.rank) .slice(0, maxResults); } export function normalizeAsin(value: string): string { const asin = value.trim().toUpperCase(); if (!ASIN_REGEX.test(asin)) { throw new Error(`Invalid ASIN: ${value}`); } return asin; } function getAsinQuery(value: string): string | undefined { const normalized = value.trim().toUpperCase(); return ASIN_REGEX.test(normalized) ? normalized : undefined; } async function fetchSearxngResults( query: string, options: SearxngSearchOptions, ): Promise { const baseUrl = normalizeBaseUrl( options.baseUrl ?? Bun.env.SEARXNG_URL ?? DEFAULT_SEARXNG_URL, ); const timeoutMs = positiveInteger( options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), DEFAULT_TIMEOUT_MS, ); const page = positiveInteger(options.page ?? 1, 1); const categories = options.categories ?? "general"; const fetchImpl = options.fetchImpl ?? fetch; const requestQuery = applySearxngEngineBang(query, options.engines); const jsonUrl = buildSearchUrl(baseUrl, requestQuery, { categories, engines: options.engines, page, format: "json", }); const jsonResponse = await fetchWithTimeout(fetchImpl, jsonUrl, timeoutMs); if (isJsonResponse(jsonResponse)) { const json = (await jsonResponse.json()) as JsonSearchResponse; return parseJsonResults(json); } const htmlUrl = buildSearchUrl(baseUrl, requestQuery, { categories, engines: options.engines, page, }); const htmlResponse = await fetchWithTimeout(fetchImpl, htmlUrl, timeoutMs); if (!htmlResponse.ok) { throw new Error( `SearXNG search failed: status=${htmlResponse.status} url=${htmlUrl.toString()}`, ); } return parseHtmlResults(await htmlResponse.text()); } function applySearxngEngineBang(query: string, engines: string | undefined): string { if (!engines || query.trim().startsWith("!")) return query; const engineList = engines .split(",") .map((engine) => engine.trim().toLowerCase()) .filter(Boolean); if (engineList.length !== 1) return query; const shortcut = searxngEngineShortcut(engineList[0]!); return shortcut ? `!${shortcut} ${query}` : query; } function searxngEngineShortcut(engine: string): string | undefined { if (engine === "google") return "go"; return undefined; } function isJsonResponse(response: Response): boolean { const contentType = response.headers.get("content-type") ?? ""; return response.ok && contentType.toLowerCase().includes("application/json"); } async function fetchWithTimeout( fetchImpl: typeof fetch, url: URL, timeoutMs: number, ): Promise { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { return await fetchImpl(url, { signal: controller.signal, headers: { accept: "application/json,text/html;q=0.9,*/*;q=0.8", "user-agent": "asin-check/1.0 (+https://searxng.nvictor.me/)", }, }); } finally { clearTimeout(timeout); } } function buildSearchUrl( baseUrl: URL, query: string, params: { categories: string; engines?: string; page: number; format?: string }, ): URL { const url = new URL("search", baseUrl); url.searchParams.set("q", query); url.searchParams.set("categories", params.categories); if (params.engines) { url.searchParams.set("engines", params.engines); } url.searchParams.set("pageno", String(params.page)); if (params.format) { url.searchParams.set("format", params.format); } return url; } async function fetchGoogleCustomSearchResults( query: string, options: SearxngSearchOptions, ): Promise { const apiKey = options.googleApiKey ?? Bun.env.GOOGLE_API_KEY; const cx = options.googleCx ?? Bun.env.GOOGLE_CSE_ID ?? Bun.env.GOOGLE_CX ?? Bun.env.GOOGLE_SEARCH_ENGINE_ID; if (!apiKey) { throw new Error("Missing GOOGLE_API_KEY for Google Custom Search."); } if (!cx) { throw new Error( "Missing Google Custom Search engine id. Set GOOGLE_CSE_ID, GOOGLE_CX, or GOOGLE_SEARCH_ENGINE_ID.", ); } const timeoutMs = positiveInteger( options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), DEFAULT_TIMEOUT_MS, ); const page = positiveInteger(options.page ?? 1, 1); const num = Math.min( 10, positiveInteger(options.maxResults ?? DEFAULT_MAX_RESULTS, DEFAULT_MAX_RESULTS), ); const fetchImpl = options.fetchImpl ?? fetch; const url = new URL(options.baseUrl ?? DEFAULT_GOOGLE_CUSTOM_SEARCH_URL); url.searchParams.set("key", apiKey); url.searchParams.set("cx", cx); url.searchParams.set("q", query); url.searchParams.set("num", String(num)); url.searchParams.set("start", String((page - 1) * num + 1)); const response = await fetchWithTimeout(fetchImpl, url, timeoutMs); if (!response.ok) { const body = await response.text().catch(() => ""); throw new Error( `Google Custom Search failed: status=${response.status} ${body.slice(0, 300)}`, ); } const json = (await response.json()) as GoogleCustomSearchResponse; return parseGoogleCustomSearchResults(json); } type GoogleCustomSearchResponse = { items?: GoogleCustomSearchItem[]; }; type GoogleCustomSearchItem = { title?: string; link?: string; snippet?: string; displayLink?: string; pagemap?: Record; }; type SerpApiShoppingResponse = { shopping_results?: SerpApiShoppingResult[]; inline_shopping_results?: SerpApiShoppingResult[]; categorized_shopping_results?: Array<{ shopping_results?: SerpApiShoppingResult[]; }>; error?: string; }; type SerpApiShoppingResult = { position?: number; title?: string; source?: string; link?: string; product_link?: string; serpapi_product_api?: string; price?: string; extracted_price?: number; old_price?: string; extracted_old_price?: number; delivery?: string; rating?: number; reviews?: number; snippet?: string; }; async function fetchSerpApiGoogleShoppingResults( query: string, options: SearxngSearchOptions, ): Promise { const apiKey = options.serpapiApiKey ?? Bun.env.SERPAPI_API_KEY; if (!apiKey) { throw new Error( "Missing SERPAPI_API_KEY. Google does not provide an official public Shopping-tab search API; use SerpApi's google_shopping API or another SERP provider.", ); } const timeoutMs = positiveInteger( options.timeoutMs ?? readEnvInt("SEARXNG_TIMEOUT_MS", DEFAULT_TIMEOUT_MS), DEFAULT_TIMEOUT_MS, ); const page = positiveInteger(options.page ?? 1, 1); const fetchImpl = options.fetchImpl ?? fetch; const url = new URL(options.baseUrl ?? DEFAULT_SERPAPI_URL); url.searchParams.set("engine", "google_shopping"); url.searchParams.set("q", query); url.searchParams.set("api_key", apiKey); url.searchParams.set("google_domain", "google.com"); url.searchParams.set("gl", "us"); url.searchParams.set("hl", "en"); url.searchParams.set("start", String((page - 1) * 60)); const response = await fetchWithTimeout(fetchImpl, url, timeoutMs); if (!response.ok) { const body = await response.text().catch(() => ""); throw new Error( `SerpApi Google Shopping failed: status=${response.status} ${body.slice(0, 300)}`, ); } const json = (await response.json()) as SerpApiShoppingResponse; if (json.error) { throw new Error(`SerpApi Google Shopping failed: ${json.error}`); } return parseSerpApiShoppingResults(json); } function parseSerpApiShoppingResults( json: SerpApiShoppingResponse, ): RawSearchResult[] { const results = [ ...(json.shopping_results ?? []), ...(json.inline_shopping_results ?? []), ...(json.categorized_shopping_results ?? []).flatMap( (category) => category.shopping_results ?? [], ), ]; return results.flatMap((item, index) => { const url = optionalString(item.link) ?? optionalString(item.product_link) ?? optionalString(item.serpapi_product_api); if (!url) return []; const priceText = optionalString(item.price); const snippet = [ priceText ? `offer price: ${priceText}` : undefined, optionalString(item.old_price) ? `list price: ${item.old_price}` : undefined, optionalString(item.source) ? `merchant: ${item.source}` : undefined, optionalString(item.delivery), optionalString(item.snippet), typeof item.rating === "number" ? `rating: ${item.rating}` : undefined, typeof item.reviews === "number" ? `reviews: ${item.reviews}` : undefined, ] .filter((value): value is string => !!value) .join(" "); return [ { title: optionalString(item.title) ?? "", url, snippet, engines: ["serpapi google shopping"], rank: item.position ?? index + 1, }, ]; }); } function parseGoogleCustomSearchResults( json: GoogleCustomSearchResponse, ): RawSearchResult[] { return (json.items ?? []).flatMap((item, index) => { const url = optionalString(item.link); if (!url) return []; const metadataText = extractGoogleCustomSearchMetadataText(item); return [ { title: optionalString(item.title) ?? "", url, snippet: [optionalString(item.snippet), metadataText] .filter((value): value is string => !!value) .join(" "), engines: ["google custom search"], rank: index + 1, }, ]; }); } function extractGoogleCustomSearchMetadataText( item: GoogleCustomSearchItem, ): string { const pagemap = item.pagemap ?? {}; const chunks: string[] = []; for (const offer of readPagemapObjects(pagemap.offer)) { appendPriceMetadata(chunks, offer); } for (const product of readPagemapObjects(pagemap.product)) { appendPriceMetadata(chunks, product); } for (const metatag of readPagemapObjects(pagemap.metatags)) { appendPriceMetadata(chunks, metatag); } return chunks.join(" "); } function appendPriceMetadata(chunks: string[], value: Record): void { const price = optionalString(value.price) ?? optionalString(value.lowprice) ?? optionalString(value.highprice) ?? optionalString(value["product:price:amount"]) ?? optionalString(value["og:price:amount"]) ?? optionalString(value["twitter:data1"]); if (!price) return; const currency = optionalString(value.pricecurrency) ?? optionalString(value.priceCurrency) ?? optionalString(value["product:price:currency"]) ?? optionalString(value["og:price:currency"]); chunks.push(currency ? `offer price: ${currency} ${price}` : `offer price: ${price}`); } function readPagemapObjects(value: unknown): Array> { if (!Array.isArray(value)) return []; return value.filter( (item): item is Record => item != null && typeof item === "object" && !Array.isArray(item), ); } function parseJsonResults(json: JsonSearchResponse): RawSearchResult[] { return (json.results ?? []).flatMap((result, index) => { const url = optionalString(result.url); if (!url) return []; return [ { title: optionalString(result.title) ?? "", url, snippet: optionalString(result.content) ?? "", engines: normalizeEngines(result.engines ?? result.engine), rank: index + 1, }, ]; }); } async function parseHtmlResults(html: string): Promise { type Draft = { title: string; url: string; snippet: string; engines: string[]; }; const results: RawSearchResult[] = []; let current: Draft | null = null; let currentTextTarget: "title" | "snippet" | "engine" | null = null; const appendText = (text: string) => { if (!current || !currentTextTarget) return; const normalized = text.replace(/\s+/g, " ").trim(); if (!normalized) return; if (currentTextTarget === "engine") { current.engines.push(normalized); return; } current[currentTextTarget] = appendWithSpace( current[currentTextTarget], normalized, ); }; const response = new HTMLRewriter() .on("article.result", { element(element) { current = { title: "", url: "", snippet: "", engines: [] }; const onEndTag = (element as unknown as { onEndTag?: (handler: () => void) => void; }).onEndTag; onEndTag?.call(element, () => { if (current?.url) { results.push({ ...current, rank: results.length + 1 }); } current = null; currentTextTarget = null; }); }, }) .on("article.result a.url_header", { element(element) { if (current && !current.url) { current.url = element.getAttribute("href") ?? ""; } }, }) .on("article.result h3 a", { element(element) { if (current && !current.url) { current.url = element.getAttribute("href") ?? ""; } currentTextTarget = "title"; }, text(text) { appendText(text.text); if (text.lastInTextNode) currentTextTarget = null; }, }) .on("article.result p.content", { text(text) { currentTextTarget = "snippet"; appendText(text.text); if (text.lastInTextNode) currentTextTarget = null; }, }) .on("article.result .engines span", { text(text) { currentTextTarget = "engine"; appendText(text.text); if (text.lastInTextNode) currentTextTarget = null; }, }) .transform(new Response(html)); await response.text(); return results; } function normalizeResult( raw: RawSearchResult, query: string, asin?: string, ): SearxngOfferSearchResult { const url = normalizeUrl(raw.url); const domain = extractDomain(url); const title = normalizeText(raw.title); const snippet = normalizeText(raw.snippet); const matchedAsin = findMatchedAsin(`${title} ${snippet} ${url}`); const detectedPrice = detectPrice(`${title} ${snippet}`); const score = scoreResult({ asin, matchedAsin, detectedPrice: detectedPrice?.amount, domain, rank: raw.rank, }); return { ...(asin ? { asin } : {}), query, title, url, domain, snippet, rank: raw.rank, score, ...(matchedAsin ? { matchedAsin } : {}), ...(detectedPrice ? { detectedPrice: detectedPrice.amount, detectedPriceCurrency: detectedPrice.currency, ...(detectedPrice.label ? { detectedPriceLabel: detectedPrice.label } : {}), detectedPriceText: detectedPrice.text, } : {}), engines: dedupe(raw.engines.map(normalizeText).filter(Boolean)), }; } function scoreResult(input: { asin?: string; matchedAsin?: string; detectedPrice?: number; domain: string; rank: number; }): number { let score = 100 - input.rank; if (input.asin && input.matchedAsin === input.asin) score += 80; if (input.matchedAsin && !input.asin) score += 40; if (input.detectedPrice != null) score += 30; if (input.domain && !isAmazonDomain(input.domain)) score += 20; if (isAmazonDomain(input.domain)) score -= 15; return score; } function normalizeBaseUrl(value: string): URL { const url = new URL(value); if (!url.pathname.endsWith("/")) { url.pathname = `${url.pathname}/`; } return url; } function normalizeUrl(value: string): string { try { return new URL(value).toString(); } catch { return value.trim(); } } function extractDomain(value: string): string { try { return new URL(value).hostname.replace(/^www\./i, "").toLowerCase(); } catch { return ""; } } function isAmazonDomain(domain: string): boolean { return /(^|\.)amazon\./i.test(domain); } function findMatchedAsin(value: string): string | undefined { const match = value.toUpperCase().match(ASIN_MATCH_REGEX); return match?.[0]; } function detectPrice(value: string): PriceDetection | undefined { const labeledCandidates = Array.from(value.matchAll(LABELED_PRICE_REGEX)) .map((match) => parsePriceMatch(match[2], match[1])) .filter((price): price is PriceDetection => !!price) .sort(comparePriceDetections); if (labeledCandidates[0]) return labeledCandidates[0]; const candidates = Array.from(value.matchAll(PRICE_REGEX)) .map((match) => parsePriceMatch(match[1])) .filter((price): price is PriceDetection => !!price); return candidates[0]; } function parsePriceMatch( rawPrice: string | undefined, rawLabel?: string, ): PriceDetection | undefined { if (!rawPrice) return undefined; const text = normalizeText(rawPrice); const currency = detectCurrency(text); const amountMatch = text.match(/[0-9]{1,5}(?:,[0-9]{3})*(?:\.[0-9]{2})?/); if (!amountMatch?.[0]) return undefined; const amount = Number(amountMatch[0].replace(/,/g, "")); if (!Number.isFinite(amount) || amount <= 0) return undefined; const label = rawLabel ? normalizeText(rawLabel).toLowerCase() : undefined; return { amount, currency, text, ...(label ? { label } : {}), }; } function comparePriceDetections(a: PriceDetection, b: PriceDetection): number { return priceLabelRank(a.label) - priceLabelRank(b.label); } function priceLabelRank(label: string | undefined): number { if (!label) return PRICE_LABELS.length; const index = PRICE_LABELS.indexOf(label as (typeof PRICE_LABELS)[number]); return index === -1 ? PRICE_LABELS.length : index; } function detectCurrency(value: string): string { if (/\b(EUR)\b|€/i.test(value)) return "EUR"; if (/\b(GBP)\b|£/i.test(value)) return "GBP"; if (/\b(INR)\b|₹/i.test(value)) return "INR"; if (/\b(CAD)\b/i.test(value)) return "CAD"; if (/\b(AUD)\b/i.test(value)) return "AUD"; return "USD"; } function escapeForCharClass(value: string): string { return value.replace(/[-\\\]^]/g, "\\$&"); } function normalizeEngines(value: unknown): string[] { if (Array.isArray(value)) { return value.map(String).filter(Boolean); } const engine = optionalString(value); return engine ? [engine] : []; } function optionalString(value: unknown): string | undefined { if (value == null) return undefined; const text = String(value).trim(); return text ? text : undefined; } function normalizeText(value: string): string { return decodeHtmlEntities(value).replace(/\s+/g, " ").trim(); } function appendWithSpace(left: string, right: string): string { return left ? `${left} ${right}` : right; } function decodeHtmlEntities(value: string): string { return value .replace(/&/g, "&") .replace(/"/g, '"') .replace(/'/g, "'") .replace(/</g, "<") .replace(/>/g, ">") .replace(/ /g, " "); } function dedupe(values: string[]): string[] { return Array.from(new Set(values)); } function readEnvInt(key: string, fallback: number): number { const parsed = Number(Bun.env[key]); return Number.isFinite(parsed) ? parsed : fallback; } function positiveInteger(value: number, fallback: number): number { return Number.isInteger(value) && value > 0 ? value : fallback; }