diff --git a/apps/sim/app/api/tools/brightdata/dataset/route.ts b/apps/sim/app/api/tools/brightdata/dataset/route.ts new file mode 100644 index 0000000000..13a83e3e81 --- /dev/null +++ b/apps/sim/app/api/tools/brightdata/dataset/route.ts @@ -0,0 +1,168 @@ +import { randomUUID } from 'crypto' +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' + +const logger = createLogger('BrightDataDatasetAPI') + +export const maxDuration = 600 + +export async function POST(request: Request) { + const requestId = randomUUID().slice(0, 8) + + try { + const body = await request.json() + const datasetId = typeof body?.datasetId === 'string' ? body.datasetId : undefined + const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined + + if (!datasetId || !apiToken) { + return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 }) + } + + const params: Record = { ...body } + params.datasetId = undefined + params.apiToken = undefined + + logger.info(`[${requestId}] Triggering dataset`, { datasetId }) + + const triggerResponse = await fetch( + `https://api.brightdata.com/datasets/v3/trigger?dataset_id=${encodeURIComponent( + datasetId + )}&include_errors=true`, + { + method: 'POST', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify([params]), + } + ) + + const triggerText = await triggerResponse.text() + let triggerPayload: unknown = triggerText + + try { + triggerPayload = JSON.parse(triggerText) + } catch { + triggerPayload = triggerText + } + + if (!triggerResponse.ok) { + const errorMessage = + typeof triggerPayload === 'object' && triggerPayload !== null && 'error' in triggerPayload + ? String((triggerPayload as { error?: unknown }).error) + : triggerResponse.statusText + + logger.error(`[${requestId}] Dataset trigger failed`, { + datasetId, + status: triggerResponse.status, + error: errorMessage, + }) + + return NextResponse.json( + { error: errorMessage || 'Dataset trigger failed' }, + { status: triggerResponse.status } + ) + } + + const snapshotId = + typeof triggerPayload === 'object' && + triggerPayload !== null && + 'snapshot_id' in triggerPayload + ? String((triggerPayload as { snapshot_id?: unknown }).snapshot_id ?? '') + : '' + + if (!snapshotId) { + logger.error(`[${requestId}] Dataset trigger missing snapshot ID`, { datasetId }) + return NextResponse.json({ error: 'No snapshot ID returned from request' }, { status: 500 }) + } + + logger.info(`[${requestId}] Dataset triggered`, { datasetId, snapshotId }) + + const maxAttempts = 600 + let attempts = 0 + + while (attempts < maxAttempts) { + const snapshotResponse = await fetch( + `https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`, + { + method: 'GET', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + } + ) + + const snapshotText = await snapshotResponse.text() + let snapshotPayload: unknown = snapshotText + + try { + snapshotPayload = JSON.parse(snapshotText) + } catch { + snapshotPayload = snapshotText + } + + if (!snapshotResponse.ok) { + if (snapshotResponse.status === 400) { + const errorMessage = + typeof snapshotPayload === 'object' && + snapshotPayload !== null && + 'error' in snapshotPayload + ? String((snapshotPayload as { error?: unknown }).error) + : snapshotResponse.statusText + + logger.error(`[${requestId}] Dataset snapshot fetch failed`, { + datasetId, + snapshotId, + status: snapshotResponse.status, + error: errorMessage, + }) + + return NextResponse.json( + { error: errorMessage || 'Dataset snapshot fetch failed' }, + { status: snapshotResponse.status } + ) + } + + attempts += 1 + await new Promise((resolve) => setTimeout(resolve, 1000)) + continue + } + + const status = + typeof snapshotPayload === 'object' && + snapshotPayload !== null && + 'status' in snapshotPayload + ? String((snapshotPayload as { status?: unknown }).status ?? '') + : '' + + if (['running', 'building', 'starting'].includes(status)) { + attempts += 1 + await new Promise((resolve) => setTimeout(resolve, 1000)) + continue + } + + const snapshotAt = + typeof snapshotPayload === 'object' && + snapshotPayload !== null && + 'snapshot_at' in snapshotPayload + ? String((snapshotPayload as { snapshot_at?: unknown }).snapshot_at ?? '') + : undefined + + logger.info(`[${requestId}] Dataset snapshot received`, { datasetId, snapshotId }) + + return NextResponse.json({ + data: snapshotPayload, + snapshot_at: snapshotAt || undefined, + }) + } + + logger.error(`[${requestId}] Dataset snapshot timed out`, { datasetId, snapshotId }) + return NextResponse.json({ error: 'Timeout waiting for dataset snapshot' }, { status: 504 }) + } catch (error) { + const message = error instanceof Error ? error.message : 'Dataset fetch failed' + logger.error(`[${requestId}] Dataset fetch failed`, { error: message }) + return NextResponse.json({ error: message }, { status: 500 }) + } +} diff --git a/apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts b/apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts new file mode 100644 index 0000000000..bd7a61b688 --- /dev/null +++ b/apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts @@ -0,0 +1,87 @@ +import { randomUUID } from 'crypto' +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' + +const logger = createLogger('BrightDataScrapeMarkdownAPI') + +export async function POST(request: Request) { + const requestId = randomUUID().slice(0, 8) + + try { + const body = await request.json() + const url = typeof body?.url === 'string' ? body.url : undefined + const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined + const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined + + if (!url || !apiToken) { + return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 }) + } + + logger.info(`[${requestId}] Scraping URL as markdown`, { url }) + + const response = await fetch('https://api.brightdata.com/request', { + method: 'POST', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + zone: unlockerZone || 'mcp_unlocker', + url, + format: 'raw', + data_format: 'markdown', + }), + }) + + const responseText = await response.text() + let payload: unknown = responseText + + try { + payload = JSON.parse(responseText) + } catch { + payload = responseText + } + + if (!response.ok) { + const errorMessage = + typeof payload === 'object' && payload !== null && 'error' in payload + ? String((payload as { error?: unknown }).error) + : response.statusText + + logger.error(`[${requestId}] Scraping failed`, { + url, + status: response.status, + error: errorMessage, + }) + + return NextResponse.json( + { error: errorMessage || 'Scraping failed' }, + { status: response.status } + ) + } + + const markdown = + typeof payload === 'object' && payload !== null && 'markdown' in payload + ? String((payload as { markdown?: unknown }).markdown ?? '') + : typeof payload === 'string' + ? payload + : JSON.stringify(payload) + + const title = + typeof payload === 'object' && payload !== null && 'title' in payload + ? String((payload as { title?: unknown }).title ?? '') + : undefined + + logger.info(`[${requestId}] Scraping completed`, { url }) + + return NextResponse.json({ + markdown, + url, + title: title || undefined, + }) + } catch (error) { + const message = error instanceof Error ? error.message : 'Scraping failed' + logger.error(`[${requestId}] Scraping failed`, { error: message }) + return NextResponse.json({ error: message }, { status: 500 }) + } +} diff --git a/apps/sim/app/api/tools/brightdata/search-engine/route.ts b/apps/sim/app/api/tools/brightdata/search-engine/route.ts new file mode 100644 index 0000000000..f6b39551f0 --- /dev/null +++ b/apps/sim/app/api/tools/brightdata/search-engine/route.ts @@ -0,0 +1,105 @@ +import { randomUUID } from 'crypto' +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' + +const logger = createLogger('BrightDataSearchEngineAPI') + +export async function POST(request: Request) { + const requestId = randomUUID().slice(0, 8) + + try { + const body = await request.json() + const query = typeof body?.query === 'string' ? body.query : undefined + const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined + const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined + const maxResults = + typeof body?.maxResults === 'number' + ? body.maxResults + : typeof body?.maxResults === 'string' + ? Number(body.maxResults) + : undefined + + if (!query || !apiToken) { + return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 }) + } + + logger.info(`[${requestId}] Searching`, { query, maxResults }) + + const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&start=0&brd_json=1` + + const response = await fetch('https://api.brightdata.com/request', { + method: 'POST', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + zone: unlockerZone || 'mcp_unlocker', + url: searchUrl, + format: 'raw', + data_format: 'parsed_light', + }), + }) + + const responseText = await response.text() + let payload: unknown = responseText + + try { + payload = JSON.parse(responseText) + } catch { + payload = responseText + } + + if (!response.ok) { + const errorMessage = + typeof payload === 'object' && payload !== null && 'error' in payload + ? String((payload as { error?: unknown }).error) + : response.statusText + + logger.error(`[${requestId}] Search failed`, { + query, + status: response.status, + error: errorMessage, + }) + + return NextResponse.json( + { error: errorMessage || 'Search failed' }, + { status: response.status } + ) + } + + let normalizedResults: Array<{ title: string; url: string; snippet: string }> = [] + + if (typeof payload === 'object' && payload !== null) { + const organic = (payload as { organic?: unknown }).organic + if (Array.isArray(organic)) { + normalizedResults = organic + .map((entry) => { + if (!entry || typeof entry !== 'object') return null + const rawTitle = (entry as { title?: unknown }).title + const rawLink = (entry as { link?: unknown }).link + const rawDescription = (entry as { description?: unknown }).description + const title = typeof rawTitle === 'string' ? rawTitle : '' + const url = typeof rawLink === 'string' ? rawLink : '' + const snippet = typeof rawDescription === 'string' ? rawDescription : '' + if (!title || !url) return null + return { title, url, snippet } + }) + .filter(Boolean) as Array<{ title: string; url: string; snippet: string }> + } + } + + const maxCount = Number.isFinite(maxResults) ? Number(maxResults) : undefined + const results = maxCount ? normalizedResults.slice(0, maxCount) : normalizedResults + + logger.info(`[${requestId}] Search completed`, { resultCount: results.length }) + + return NextResponse.json({ + results, + }) + } catch (error) { + const message = error instanceof Error ? error.message : 'Search failed' + logger.error(`[${requestId}] Search failed`, { error: message }) + return NextResponse.json({ error: message }, { status: 500 }) + } +} diff --git a/apps/sim/blocks/blocks/brightdata.ts b/apps/sim/blocks/blocks/brightdata.ts new file mode 100644 index 0000000000..568071bcb9 --- /dev/null +++ b/apps/sim/blocks/blocks/brightdata.ts @@ -0,0 +1,366 @@ +import { BrightDataIcon } from '@/components/icons' +import type { BlockConfig } from '@/blocks/types' +import { AuthMode } from '@/blocks/types' +import type { BrightDataResponse } from '@/tools/brightdata/types' + +const DATASET_TOOL_MAP: Record = { + dataset_amazon_product: 'brightdata_dataset_amazon_product', + dataset_amazon_product_reviews: 'brightdata_dataset_amazon_product_reviews', + dataset_amazon_product_search: 'brightdata_dataset_amazon_product_search', + dataset_walmart_product: 'brightdata_dataset_walmart_product', + dataset_walmart_seller: 'brightdata_dataset_walmart_seller', + dataset_ebay_product: 'brightdata_dataset_ebay_product', + dataset_homedepot_products: 'brightdata_dataset_homedepot_products', + dataset_zara_products: 'brightdata_dataset_zara_products', + dataset_etsy_products: 'brightdata_dataset_etsy_products', + dataset_bestbuy_products: 'brightdata_dataset_bestbuy_products', + dataset_linkedin_person_profile: 'brightdata_dataset_linkedin_person_profile', + dataset_linkedin_company_profile: 'brightdata_dataset_linkedin_company_profile', + dataset_linkedin_job_listings: 'brightdata_dataset_linkedin_job_listings', + dataset_linkedin_posts: 'brightdata_dataset_linkedin_posts', + dataset_linkedin_people_search: 'brightdata_dataset_linkedin_people_search', + dataset_crunchbase_company: 'brightdata_dataset_crunchbase_company', + dataset_zoominfo_company_profile: 'brightdata_dataset_zoominfo_company_profile', + dataset_instagram_profiles: 'brightdata_dataset_instagram_profiles', + dataset_instagram_posts: 'brightdata_dataset_instagram_posts', + dataset_instagram_reels: 'brightdata_dataset_instagram_reels', + dataset_instagram_comments: 'brightdata_dataset_instagram_comments', + dataset_facebook_posts: 'brightdata_dataset_facebook_posts', + dataset_facebook_marketplace_listings: 'brightdata_dataset_facebook_marketplace_listings', + dataset_facebook_company_reviews: 'brightdata_dataset_facebook_company_reviews', + dataset_facebook_events: 'brightdata_dataset_facebook_events', + dataset_tiktok_profiles: 'brightdata_dataset_tiktok_profiles', + dataset_tiktok_posts: 'brightdata_dataset_tiktok_posts', + dataset_tiktok_shop: 'brightdata_dataset_tiktok_shop', + dataset_tiktok_comments: 'brightdata_dataset_tiktok_comments', + dataset_google_maps_reviews: 'brightdata_dataset_google_maps_reviews', + dataset_google_shopping: 'brightdata_dataset_google_shopping', + dataset_google_play_store: 'brightdata_dataset_google_play_store', + dataset_apple_app_store: 'brightdata_dataset_apple_app_store', + dataset_reuter_news: 'brightdata_dataset_reuter_news', + dataset_github_repository_file: 'brightdata_dataset_github_repository_file', + dataset_yahoo_finance_business: 'brightdata_dataset_yahoo_finance_business', + dataset_x_posts: 'brightdata_dataset_x_posts', + dataset_zillow_properties_listing: 'brightdata_dataset_zillow_properties_listing', + dataset_booking_hotel_listings: 'brightdata_dataset_booking_hotel_listings', + dataset_youtube_profiles: 'brightdata_dataset_youtube_profiles', + dataset_youtube_comments: 'brightdata_dataset_youtube_comments', + dataset_reddit_posts: 'brightdata_dataset_reddit_posts', + dataset_youtube_videos: 'brightdata_dataset_youtube_videos', + dataset_npm_package: 'brightdata_dataset_npm_package', + dataset_pypi_package: 'brightdata_dataset_pypi_package', +} + +export const BrightDataBlock: BlockConfig = { + type: 'brightdata', + name: 'Bright Data', + description: 'Web scraping, search, and dataset access', + authMode: AuthMode.ApiKey, + longDescription: + "Access Bright Data's web data collection tools including web scraping, search, and datasets.", + docsLink: 'https://docs.sim.ai/tools/brightdata', + category: 'tools', + bgColor: '#3D7FFC', + icon: BrightDataIcon, + + subBlocks: [ + { + id: 'operation', + title: 'Operation', + type: 'dropdown', + required: true, + options: [ + { label: 'Scrape as Markdown', id: 'scrape_markdown' }, + { label: 'Search Engine', id: 'search_engine' }, + { label: 'Amazon Product Dataset', id: 'dataset_amazon_product' }, + { label: 'Amazon Product Reviews Dataset', id: 'dataset_amazon_product_reviews' }, + { label: 'Amazon Product Search Dataset', id: 'dataset_amazon_product_search' }, + { label: 'Walmart Product Dataset', id: 'dataset_walmart_product' }, + { label: 'Walmart Seller Dataset', id: 'dataset_walmart_seller' }, + { label: 'Ebay Product Dataset', id: 'dataset_ebay_product' }, + { label: 'Homedepot Products Dataset', id: 'dataset_homedepot_products' }, + { label: 'Zara Products Dataset', id: 'dataset_zara_products' }, + { label: 'Etsy Products Dataset', id: 'dataset_etsy_products' }, + { label: 'Bestbuy Products Dataset', id: 'dataset_bestbuy_products' }, + { label: 'Linkedin Person Profile Dataset', id: 'dataset_linkedin_person_profile' }, + { label: 'Linkedin Company Profile Dataset', id: 'dataset_linkedin_company_profile' }, + { label: 'Linkedin Job Listings Dataset', id: 'dataset_linkedin_job_listings' }, + { label: 'Linkedin Posts Dataset', id: 'dataset_linkedin_posts' }, + { label: 'Linkedin People Search Dataset', id: 'dataset_linkedin_people_search' }, + { label: 'Crunchbase Company Dataset', id: 'dataset_crunchbase_company' }, + { label: 'Zoominfo Company Profile Dataset', id: 'dataset_zoominfo_company_profile' }, + { label: 'Instagram Profiles Dataset', id: 'dataset_instagram_profiles' }, + { label: 'Instagram Posts Dataset', id: 'dataset_instagram_posts' }, + { label: 'Instagram Reels Dataset', id: 'dataset_instagram_reels' }, + { label: 'Instagram Comments Dataset', id: 'dataset_instagram_comments' }, + { label: 'Facebook Posts Dataset', id: 'dataset_facebook_posts' }, + { + label: 'Facebook Marketplace Listings Dataset', + id: 'dataset_facebook_marketplace_listings', + }, + { label: 'Facebook Company Reviews Dataset', id: 'dataset_facebook_company_reviews' }, + { label: 'Facebook Events Dataset', id: 'dataset_facebook_events' }, + { label: 'Tiktok Profiles Dataset', id: 'dataset_tiktok_profiles' }, + { label: 'Tiktok Posts Dataset', id: 'dataset_tiktok_posts' }, + { label: 'Tiktok Shop Dataset', id: 'dataset_tiktok_shop' }, + { label: 'Tiktok Comments Dataset', id: 'dataset_tiktok_comments' }, + { label: 'Google Maps Reviews Dataset', id: 'dataset_google_maps_reviews' }, + { label: 'Google Shopping Dataset', id: 'dataset_google_shopping' }, + { label: 'Google Play Store Dataset', id: 'dataset_google_play_store' }, + { label: 'Apple App Store Dataset', id: 'dataset_apple_app_store' }, + { label: 'Reuter News Dataset', id: 'dataset_reuter_news' }, + { label: 'Github Repository File Dataset', id: 'dataset_github_repository_file' }, + { label: 'Yahoo Finance Business Dataset', id: 'dataset_yahoo_finance_business' }, + { label: 'X Posts Dataset', id: 'dataset_x_posts' }, + { label: 'Zillow Properties Listing Dataset', id: 'dataset_zillow_properties_listing' }, + { label: 'Booking Hotel Listings Dataset', id: 'dataset_booking_hotel_listings' }, + { label: 'Youtube Profiles Dataset', id: 'dataset_youtube_profiles' }, + { label: 'Youtube Comments Dataset', id: 'dataset_youtube_comments' }, + { label: 'Reddit Posts Dataset', id: 'dataset_reddit_posts' }, + { label: 'Youtube Videos Dataset', id: 'dataset_youtube_videos' }, + { label: 'Npm Package Dataset', id: 'dataset_npm_package' }, + { label: 'Pypi Package Dataset', id: 'dataset_pypi_package' }, + ], + value: () => 'scrape_markdown', + }, + { + id: 'url', + title: 'URL', + type: 'short-input', + placeholder: 'https://example.com', + condition: { field: 'operation', value: 'scrape_markdown' }, + required: true, + }, + { + id: 'query', + title: 'Search Query', + type: 'short-input', + placeholder: 'Enter search query', + condition: { field: 'operation', value: 'search_engine' }, + required: true, + }, + { + id: 'maxResults', + title: 'Max Results', + type: 'short-input', + placeholder: '10', + condition: { field: 'operation', value: 'search_engine' }, + }, + { + id: 'url', + title: 'Dataset URL', + type: 'short-input', + placeholder: 'https://example.com', + condition: { + field: 'operation', + value: [ + 'dataset_amazon_product', + 'dataset_amazon_product_reviews', + 'dataset_amazon_product_search', + 'dataset_walmart_product', + 'dataset_walmart_seller', + 'dataset_ebay_product', + 'dataset_homedepot_products', + 'dataset_zara_products', + 'dataset_etsy_products', + 'dataset_bestbuy_products', + 'dataset_linkedin_person_profile', + 'dataset_linkedin_company_profile', + 'dataset_linkedin_job_listings', + 'dataset_linkedin_posts', + 'dataset_linkedin_people_search', + 'dataset_crunchbase_company', + 'dataset_zoominfo_company_profile', + 'dataset_instagram_profiles', + 'dataset_instagram_posts', + 'dataset_instagram_reels', + 'dataset_instagram_comments', + 'dataset_facebook_posts', + 'dataset_facebook_marketplace_listings', + 'dataset_facebook_company_reviews', + 'dataset_facebook_events', + 'dataset_tiktok_profiles', + 'dataset_tiktok_posts', + 'dataset_tiktok_shop', + 'dataset_tiktok_comments', + 'dataset_google_maps_reviews', + 'dataset_google_shopping', + 'dataset_google_play_store', + 'dataset_apple_app_store', + 'dataset_reuter_news', + 'dataset_github_repository_file', + 'dataset_yahoo_finance_business', + 'dataset_x_posts', + 'dataset_zillow_properties_listing', + 'dataset_booking_hotel_listings', + 'dataset_youtube_profiles', + 'dataset_youtube_comments', + 'dataset_reddit_posts', + 'dataset_youtube_videos', + ], + }, + required: true, + }, + { + id: 'keyword', + title: 'Keyword', + type: 'short-input', + placeholder: 'Enter keyword', + condition: { field: 'operation', value: ['dataset_amazon_product_search'] }, + required: true, + }, + { + id: 'first_name', + title: 'First Name', + type: 'short-input', + placeholder: 'First name', + condition: { field: 'operation', value: ['dataset_linkedin_people_search'] }, + required: true, + }, + { + id: 'last_name', + title: 'Last Name', + type: 'short-input', + placeholder: 'Last name', + condition: { field: 'operation', value: ['dataset_linkedin_people_search'] }, + required: true, + }, + { + id: 'num_of_reviews', + title: 'Number of Reviews', + type: 'short-input', + placeholder: '10', + condition: { field: 'operation', value: ['dataset_facebook_company_reviews'] }, + required: true, + }, + { + id: 'days_limit', + title: 'Days Limit', + type: 'short-input', + placeholder: '3', + condition: { field: 'operation', value: ['dataset_google_maps_reviews'] }, + }, + { + id: 'num_of_comments', + title: 'Number of Comments', + type: 'short-input', + placeholder: '10', + condition: { field: 'operation', value: ['dataset_youtube_comments'] }, + }, + { + id: 'package_name', + title: 'Package Name', + type: 'short-input', + placeholder: '@brightdata/sdk', + condition: { field: 'operation', value: ['dataset_npm_package', 'dataset_pypi_package'] }, + required: true, + }, + { + id: 'apiToken', + title: 'API Token', + type: 'short-input', + placeholder: 'Your Bright Data API token', + password: true, + required: true, + }, + { + id: 'unlockerZone', + title: 'Unlocker Zone', + type: 'short-input', + placeholder: 'mcp_unlocker', + mode: 'advanced', + }, + ], + + tools: { + access: [ + 'brightdata_scrape_markdown', + 'brightdata_search_engine', + 'brightdata_dataset_amazon_product', + 'brightdata_dataset_amazon_product_reviews', + 'brightdata_dataset_amazon_product_search', + 'brightdata_dataset_walmart_product', + 'brightdata_dataset_walmart_seller', + 'brightdata_dataset_ebay_product', + 'brightdata_dataset_homedepot_products', + 'brightdata_dataset_zara_products', + 'brightdata_dataset_etsy_products', + 'brightdata_dataset_bestbuy_products', + 'brightdata_dataset_linkedin_person_profile', + 'brightdata_dataset_linkedin_company_profile', + 'brightdata_dataset_linkedin_job_listings', + 'brightdata_dataset_linkedin_posts', + 'brightdata_dataset_linkedin_people_search', + 'brightdata_dataset_crunchbase_company', + 'brightdata_dataset_zoominfo_company_profile', + 'brightdata_dataset_instagram_profiles', + 'brightdata_dataset_instagram_posts', + 'brightdata_dataset_instagram_reels', + 'brightdata_dataset_instagram_comments', + 'brightdata_dataset_facebook_posts', + 'brightdata_dataset_facebook_marketplace_listings', + 'brightdata_dataset_facebook_company_reviews', + 'brightdata_dataset_facebook_events', + 'brightdata_dataset_tiktok_profiles', + 'brightdata_dataset_tiktok_posts', + 'brightdata_dataset_tiktok_shop', + 'brightdata_dataset_tiktok_comments', + 'brightdata_dataset_google_maps_reviews', + 'brightdata_dataset_google_shopping', + 'brightdata_dataset_google_play_store', + 'brightdata_dataset_apple_app_store', + 'brightdata_dataset_reuter_news', + 'brightdata_dataset_github_repository_file', + 'brightdata_dataset_yahoo_finance_business', + 'brightdata_dataset_x_posts', + 'brightdata_dataset_zillow_properties_listing', + 'brightdata_dataset_booking_hotel_listings', + 'brightdata_dataset_youtube_profiles', + 'brightdata_dataset_youtube_comments', + 'brightdata_dataset_reddit_posts', + 'brightdata_dataset_youtube_videos', + 'brightdata_dataset_npm_package', + 'brightdata_dataset_pypi_package', + ], + config: { + tool: (params: Record) => { + const datasetTool = DATASET_TOOL_MAP[String(params.operation)] + if (datasetTool) return datasetTool + switch (params.operation) { + case 'scrape_markdown': + return 'brightdata_scrape_markdown' + case 'search_engine': + return 'brightdata_search_engine' + default: + throw new Error('Invalid operation selected') + } + }, + }, + }, + + inputs: { + operation: { type: 'string', description: 'Operation to perform' }, + url: { type: 'string', description: 'URL to scrape or dataset input' }, + query: { type: 'string', description: 'Search query' }, + maxResults: { type: 'number', description: 'Maximum search results' }, + keyword: { type: 'string', description: 'Dataset keyword input' }, + first_name: { type: 'string', description: 'Dataset first name input' }, + last_name: { type: 'string', description: 'Dataset last name input' }, + num_of_reviews: { type: 'string', description: 'Dataset number of reviews input' }, + days_limit: { type: 'string', description: 'Dataset days limit input' }, + num_of_comments: { type: 'string', description: 'Dataset number of comments input' }, + package_name: { type: 'string', description: 'Dataset package name input' }, + apiToken: { type: 'string', description: 'Bright Data API token' }, + unlockerZone: { type: 'string', description: 'Unlocker zone name' }, + }, + + outputs: { + markdown: { type: 'string', description: 'Scraped markdown content' }, + results: { type: 'array', description: 'Search results' }, + data: { type: 'json', description: 'Dataset response' }, + url: { type: 'string', description: 'Current or scraped URL' }, + title: { type: 'string', description: 'Page title' }, + success: { type: 'boolean', description: 'Operation success status' }, + snapshot_at: { type: 'string', description: 'Dataset snapshot timestamp' }, + }, +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 06752d5d68..34642c341a 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -8,6 +8,7 @@ import { ApifyBlock } from '@/blocks/blocks/apify' import { ApolloBlock } from '@/blocks/blocks/apollo' import { ArxivBlock } from '@/blocks/blocks/arxiv' import { AsanaBlock } from '@/blocks/blocks/asana' +import { BrightDataBlock } from '@/blocks/blocks/brightdata' // import { BoxBlock } from '@/blocks/blocks/box' // TODO: Box OAuth integration import { BrowserUseBlock } from '@/blocks/blocks/browser_use' import { CalendlyBlock } from '@/blocks/blocks/calendly' @@ -160,6 +161,7 @@ export const registry: Record = { arxiv: ArxivBlock, asana: AsanaBlock, // box: BoxBlock, // TODO: Box OAuth integration + brightdata: BrightDataBlock, browser_use: BrowserUseBlock, calendly: CalendlyBlock, chat_trigger: ChatTriggerBlock, diff --git a/apps/sim/components/icons.tsx b/apps/sim/components/icons.tsx index 0143e517a5..9c825bc341 100644 --- a/apps/sim/components/icons.tsx +++ b/apps/sim/components/icons.tsx @@ -425,6 +425,67 @@ export function FirecrawlIcon(props: SVGProps) { ) } +export function BrightDataIcon(props: SVGProps) { + return ( + + + + + + + + + + + + + + ) +} + export function JinaAIIcon(props: SVGProps) { return ( = { + id: 'brightdata_dataset_amazon_product', + name: 'Bright Data Amazon Product Dataset', + description: + 'Quickly read structured amazon product data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l7q7dkf244hwjntr0', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_amazon_product_reviews.ts b/apps/sim/tools/brightdata/dataset_amazon_product_reviews.ts new file mode 100644 index 0000000000..fca9c6a631 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_amazon_product_reviews.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Amazon Product Reviews dataset tool. + */ +export const datasetAmazonProductReviewsTool: ToolConfig = { + id: 'brightdata_dataset_amazon_product_reviews', + name: 'Bright Data Amazon Product Reviews Dataset', + description: + 'Quickly read structured amazon product review data.\nRequires a valid product URL with /dp/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_le8e811kzy4ggddlq', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_amazon_product_search.ts b/apps/sim/tools/brightdata/dataset_amazon_product_search.ts new file mode 100644 index 0000000000..b8b56a4d09 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_amazon_product_search.ts @@ -0,0 +1,79 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Amazon Product Search dataset tool. + */ +export const datasetAmazonProductSearchTool: ToolConfig = { + id: 'brightdata_dataset_amazon_product_search', + name: 'Bright Data Amazon Product Search Dataset', + description: + 'Quickly read structured amazon product search data.\nRequires a valid search keyword and amazon domain URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + keyword: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Search keyword', + }, + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lwdb4vjm1ehb499uxs', + apiToken: params.apiToken, + keyword: params.keyword, + url: params.url, + } + + body.pages_to_search = '1' + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_apple_app_store.ts b/apps/sim/tools/brightdata/dataset_apple_app_store.ts new file mode 100644 index 0000000000..9319dc0737 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_apple_app_store.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Apple App Store dataset tool. + */ +export const datasetAppleAppStoreTool: ToolConfig = { + id: 'brightdata_dataset_apple_app_store', + name: 'Bright Data Apple App Store Dataset', + description: + 'Quickly read structured apple app store data.\nRequires a valid apple app store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lsk9ki3u2iishmwrui', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_bestbuy_products.ts b/apps/sim/tools/brightdata/dataset_bestbuy_products.ts new file mode 100644 index 0000000000..8008d9828a --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_bestbuy_products.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Bestbuy Products dataset tool. + */ +export const datasetBestbuyProductsTool: ToolConfig = { + id: 'brightdata_dataset_bestbuy_products', + name: 'Bright Data Bestbuy Products Dataset', + description: + 'Quickly read structured bestbuy product data.\nRequires a valid bestbuy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_ltre1jqe1jfr7cccf', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_booking_hotel_listings.ts b/apps/sim/tools/brightdata/dataset_booking_hotel_listings.ts new file mode 100644 index 0000000000..5dd6bb4fb9 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_booking_hotel_listings.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Booking Hotel Listings dataset tool. + */ +export const datasetBookingHotelListingsTool: ToolConfig = { + id: 'brightdata_dataset_booking_hotel_listings', + name: 'Bright Data Booking Hotel Listings Dataset', + description: + 'Quickly read structured booking hotel listings data.\nRequires a valid booking hotel listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m5mbdl081229ln6t4a', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_crunchbase_company.ts b/apps/sim/tools/brightdata/dataset_crunchbase_company.ts new file mode 100644 index 0000000000..f5494defdb --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_crunchbase_company.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Crunchbase Company dataset tool. + */ +export const datasetCrunchbaseCompanyTool: ToolConfig = { + id: 'brightdata_dataset_crunchbase_company', + name: 'Bright Data Crunchbase Company Dataset', + description: + 'Quickly read structured crunchbase company data\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l1vijqt9jfj7olije', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_ebay_product.ts b/apps/sim/tools/brightdata/dataset_ebay_product.ts new file mode 100644 index 0000000000..9031c4f92b --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_ebay_product.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Ebay Product dataset tool. + */ +export const datasetEbayProductTool: ToolConfig = { + id: 'brightdata_dataset_ebay_product', + name: 'Bright Data Ebay Product Dataset', + description: + 'Quickly read structured ebay product data.\nRequires a valid ebay product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_ltr9mjt81n0zzdk1fb', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_etsy_products.ts b/apps/sim/tools/brightdata/dataset_etsy_products.ts new file mode 100644 index 0000000000..6dbfadcb1b --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_etsy_products.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Etsy Products dataset tool. + */ +export const datasetEtsyProductsTool: ToolConfig = { + id: 'brightdata_dataset_etsy_products', + name: 'Bright Data Etsy Products Dataset', + description: + 'Quickly read structured etsy product data.\nRequires a valid etsy product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_ltppk0jdv1jqz25mz', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_facebook_company_reviews.ts b/apps/sim/tools/brightdata/dataset_facebook_company_reviews.ts new file mode 100644 index 0000000000..bb463d414f --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_facebook_company_reviews.ts @@ -0,0 +1,77 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Facebook Company Reviews dataset tool. + */ +export const datasetFacebookCompanyReviewsTool: ToolConfig = { + id: 'brightdata_dataset_facebook_company_reviews', + name: 'Bright Data Facebook Company Reviews Dataset', + description: + 'Quickly read structured Facebook company reviews data.\nRequires a valid Facebook company URL and number of reviews.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + num_of_reviews: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Number of reviews to fetch', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m0dtqpiu1mbcyc2g86', + apiToken: params.apiToken, + url: params.url, + num_of_reviews: params.num_of_reviews, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_facebook_events.ts b/apps/sim/tools/brightdata/dataset_facebook_events.ts new file mode 100644 index 0000000000..b6fa6fa7a2 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_facebook_events.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Facebook Events dataset tool. + */ +export const datasetFacebookEventsTool: ToolConfig = { + id: 'brightdata_dataset_facebook_events', + name: 'Bright Data Facebook Events Dataset', + description: + 'Quickly read structured Facebook events data.\nRequires a valid Facebook event URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m14sd0to1jz48ppm51', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_facebook_marketplace_listings.ts b/apps/sim/tools/brightdata/dataset_facebook_marketplace_listings.ts new file mode 100644 index 0000000000..d680e37334 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_facebook_marketplace_listings.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Facebook Marketplace Listings dataset tool. + */ +export const datasetFacebookMarketplaceListingsTool: ToolConfig = { + id: 'brightdata_dataset_facebook_marketplace_listings', + name: 'Bright Data Facebook Marketplace Listings Dataset', + description: + 'Quickly read structured Facebook marketplace listing data.\nRequires a valid Facebook marketplace listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lvt9iwuh6fbcwmx1a', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_facebook_posts.ts b/apps/sim/tools/brightdata/dataset_facebook_posts.ts new file mode 100644 index 0000000000..8dc8fcadb7 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_facebook_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Facebook Posts dataset tool. + */ +export const datasetFacebookPostsTool: ToolConfig = { + id: 'brightdata_dataset_facebook_posts', + name: 'Bright Data Facebook Posts Dataset', + description: + 'Quickly read structured Facebook post data.\nRequires a valid Facebook post URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lyclm1571iy3mv57zw', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_github_repository_file.ts b/apps/sim/tools/brightdata/dataset_github_repository_file.ts new file mode 100644 index 0000000000..6e8d86fff1 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_github_repository_file.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Github Repository File dataset tool. + */ +export const datasetGithubRepositoryFileTool: ToolConfig = { + id: 'brightdata_dataset_github_repository_file', + name: 'Bright Data Github Repository File Dataset', + description: + 'Quickly read structured github repository data.\nRequires a valid github repository file URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lyrexgxc24b3d4imjt', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_google_maps_reviews.ts b/apps/sim/tools/brightdata/dataset_google_maps_reviews.ts new file mode 100644 index 0000000000..84b9a10b1f --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_google_maps_reviews.ts @@ -0,0 +1,81 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Google Maps Reviews dataset tool. + */ +export const datasetGoogleMapsReviewsTool: ToolConfig = { + id: 'brightdata_dataset_google_maps_reviews', + name: 'Bright Data Google Maps Reviews Dataset', + description: + 'Quickly read structured Google maps reviews data.\nRequires a valid Google maps URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + days_limit: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Days limit (default: 3)', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_luzfs1dn2oa0teb81', + apiToken: params.apiToken, + url: params.url, + days_limit: params.days_limit, + } + + if (body.days_limit === undefined) { + body.days_limit = '3' + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_google_play_store.ts b/apps/sim/tools/brightdata/dataset_google_play_store.ts new file mode 100644 index 0000000000..c86600a60d --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_google_play_store.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Google Play Store dataset tool. + */ +export const datasetGooglePlayStoreTool: ToolConfig = { + id: 'brightdata_dataset_google_play_store', + name: 'Bright Data Google Play Store Dataset', + description: + 'Quickly read structured Google play store data.\nRequires a valid Google play store app URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lsk382l8xei8vzm4u', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_google_shopping.ts b/apps/sim/tools/brightdata/dataset_google_shopping.ts new file mode 100644 index 0000000000..c0c4a11c9d --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_google_shopping.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Google Shopping dataset tool. + */ +export const datasetGoogleShoppingTool: ToolConfig = { + id: 'brightdata_dataset_google_shopping', + name: 'Bright Data Google Shopping Dataset', + description: + 'Quickly read structured Google shopping data.\nRequires a valid Google shopping product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_ltppk50q18kdw67omz', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_homedepot_products.ts b/apps/sim/tools/brightdata/dataset_homedepot_products.ts new file mode 100644 index 0000000000..cc94739ee1 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_homedepot_products.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Homedepot Products dataset tool. + */ +export const datasetHomedepotProductsTool: ToolConfig = { + id: 'brightdata_dataset_homedepot_products', + name: 'Bright Data Homedepot Products Dataset', + description: + 'Quickly read structured homedepot product data.\nRequires a valid homedepot product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lmusivh019i7g97q2n', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_instagram_comments.ts b/apps/sim/tools/brightdata/dataset_instagram_comments.ts new file mode 100644 index 0000000000..4944c65f20 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_instagram_comments.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Instagram Comments dataset tool. + */ +export const datasetInstagramCommentsTool: ToolConfig = { + id: 'brightdata_dataset_instagram_comments', + name: 'Bright Data Instagram Comments Dataset', + description: + 'Quickly read structured Instagram comments data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_ltppn085pokosxh13', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_instagram_posts.ts b/apps/sim/tools/brightdata/dataset_instagram_posts.ts new file mode 100644 index 0000000000..23006fcf4f --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_instagram_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Instagram Posts dataset tool. + */ +export const datasetInstagramPostsTool: ToolConfig = { + id: 'brightdata_dataset_instagram_posts', + name: 'Bright Data Instagram Posts Dataset', + description: + 'Quickly read structured Instagram post data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lk5ns7kz21pck8jpis', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_instagram_profiles.ts b/apps/sim/tools/brightdata/dataset_instagram_profiles.ts new file mode 100644 index 0000000000..af246beea0 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_instagram_profiles.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Instagram Profiles dataset tool. + */ +export const datasetInstagramProfilesTool: ToolConfig = { + id: 'brightdata_dataset_instagram_profiles', + name: 'Bright Data Instagram Profiles Dataset', + description: + 'Quickly read structured Instagram profile data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l1vikfch901nx3by4', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_instagram_reels.ts b/apps/sim/tools/brightdata/dataset_instagram_reels.ts new file mode 100644 index 0000000000..9992453a45 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_instagram_reels.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Instagram Reels dataset tool. + */ +export const datasetInstagramReelsTool: ToolConfig = { + id: 'brightdata_dataset_instagram_reels', + name: 'Bright Data Instagram Reels Dataset', + description: + 'Quickly read structured Instagram reel data.\nRequires a valid Instagram URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lyclm20il4r5helnj', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_linkedin_company_profile.ts b/apps/sim/tools/brightdata/dataset_linkedin_company_profile.ts new file mode 100644 index 0000000000..091ae634bb --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_linkedin_company_profile.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Linkedin Company Profile dataset tool. + */ +export const datasetLinkedinCompanyProfileTool: ToolConfig = { + id: 'brightdata_dataset_linkedin_company_profile', + name: 'Bright Data Linkedin Company Profile Dataset', + description: + 'Quickly read structured linkedin company profile data\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l1vikfnt1wgvvqz95w', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_linkedin_job_listings.ts b/apps/sim/tools/brightdata/dataset_linkedin_job_listings.ts new file mode 100644 index 0000000000..b5af2070c4 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_linkedin_job_listings.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Linkedin Job Listings dataset tool. + */ +export const datasetLinkedinJobListingsTool: ToolConfig = { + id: 'brightdata_dataset_linkedin_job_listings', + name: 'Bright Data Linkedin Job Listings Dataset', + description: + 'Quickly read structured linkedin job listings data\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lpfll7v5hcqtkxl6l', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_linkedin_people_search.ts b/apps/sim/tools/brightdata/dataset_linkedin_people_search.ts new file mode 100644 index 0000000000..8cefdc32c5 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_linkedin_people_search.ts @@ -0,0 +1,84 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Linkedin People Search dataset tool. + */ +export const datasetLinkedinPeopleSearchTool: ToolConfig = { + id: 'brightdata_dataset_linkedin_people_search', + name: 'Bright Data Linkedin People Search Dataset', + description: + 'Quickly read structured linkedin people search data\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + first_name: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'First name', + }, + last_name: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Last name', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m8d03he47z8nwb5xc', + apiToken: params.apiToken, + url: params.url, + first_name: params.first_name, + last_name: params.last_name, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_linkedin_person_profile.ts b/apps/sim/tools/brightdata/dataset_linkedin_person_profile.ts new file mode 100644 index 0000000000..937b50a2ef --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_linkedin_person_profile.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Linkedin Person Profile dataset tool. + */ +export const datasetLinkedinPersonProfileTool: ToolConfig = { + id: 'brightdata_dataset_linkedin_person_profile', + name: 'Bright Data Linkedin Person Profile Dataset', + description: + 'Quickly read structured linkedin people profile data.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l1viktl72bvl7bjuj0', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_linkedin_posts.ts b/apps/sim/tools/brightdata/dataset_linkedin_posts.ts new file mode 100644 index 0000000000..027f9f2f3a --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_linkedin_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Linkedin Posts dataset tool. + */ +export const datasetLinkedinPostsTool: ToolConfig = { + id: 'brightdata_dataset_linkedin_posts', + name: 'Bright Data Linkedin Posts Dataset', + description: + 'Quickly read structured linkedin posts data\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lyy3tktm25m4avu764', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_npm_package.ts b/apps/sim/tools/brightdata/dataset_npm_package.ts new file mode 100644 index 0000000000..f77a964078 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_npm_package.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Npm Package dataset tool. + */ +export const datasetNpmPackageTool: ToolConfig = { + id: 'brightdata_dataset_npm_package', + name: 'Bright Data Npm Package Dataset', + description: + 'Quickly read structured npm package data.\nRequires a valid npm package name (e.g., @brightdata/sdk).\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + package_name: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Package name', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_mk57m0301khq4jmsul', + apiToken: params.apiToken, + package_name: params.package_name, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_pypi_package.ts b/apps/sim/tools/brightdata/dataset_pypi_package.ts new file mode 100644 index 0000000000..476269ab60 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_pypi_package.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Pypi Package dataset tool. + */ +export const datasetPypiPackageTool: ToolConfig = { + id: 'brightdata_dataset_pypi_package', + name: 'Bright Data Pypi Package Dataset', + description: + 'Quickly read structured PyPI package data.\nRequires a valid PyPI package name (e.g., langchain-brightdata).\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + package_name: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Package name', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_mk57kc3t1wwgmnepp9', + apiToken: params.apiToken, + package_name: params.package_name, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_reddit_posts.ts b/apps/sim/tools/brightdata/dataset_reddit_posts.ts new file mode 100644 index 0000000000..6758fd4463 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_reddit_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Reddit Posts dataset tool. + */ +export const datasetRedditPostsTool: ToolConfig = { + id: 'brightdata_dataset_reddit_posts', + name: 'Bright Data Reddit Posts Dataset', + description: + 'Quickly read structured reddit posts data.\nRequires a valid reddit post URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lvz8ah06191smkebj4', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_reuter_news.ts b/apps/sim/tools/brightdata/dataset_reuter_news.ts new file mode 100644 index 0000000000..fdb1072640 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_reuter_news.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Reuter News dataset tool. + */ +export const datasetReuterNewsTool: ToolConfig = { + id: 'brightdata_dataset_reuter_news', + name: 'Bright Data Reuter News Dataset', + description: + 'Quickly read structured reuter news data.\nRequires a valid reuter news report URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lyptx9h74wtlvpnfu', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_tiktok_comments.ts b/apps/sim/tools/brightdata/dataset_tiktok_comments.ts new file mode 100644 index 0000000000..69c216d37b --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_tiktok_comments.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Tiktok Comments dataset tool. + */ +export const datasetTiktokCommentsTool: ToolConfig = { + id: 'brightdata_dataset_tiktok_comments', + name: 'Bright Data Tiktok Comments Dataset', + description: + 'Quickly read structured Tiktok comments data.\nRequires a valid Tiktok video URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lkf2st302ap89utw5k', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_tiktok_posts.ts b/apps/sim/tools/brightdata/dataset_tiktok_posts.ts new file mode 100644 index 0000000000..0a91c773ad --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_tiktok_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Tiktok Posts dataset tool. + */ +export const datasetTiktokPostsTool: ToolConfig = { + id: 'brightdata_dataset_tiktok_posts', + name: 'Bright Data Tiktok Posts Dataset', + description: + 'Quickly read structured Tiktok post data.\nRequires a valid Tiktok post URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lu702nij2f790tmv9h', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_tiktok_profiles.ts b/apps/sim/tools/brightdata/dataset_tiktok_profiles.ts new file mode 100644 index 0000000000..ec8ab5e004 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_tiktok_profiles.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Tiktok Profiles dataset tool. + */ +export const datasetTiktokProfilesTool: ToolConfig = { + id: 'brightdata_dataset_tiktok_profiles', + name: 'Bright Data Tiktok Profiles Dataset', + description: + 'Quickly read structured Tiktok profiles data.\nRequires a valid Tiktok profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l1villgoiiidt09ci', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_tiktok_shop.ts b/apps/sim/tools/brightdata/dataset_tiktok_shop.ts new file mode 100644 index 0000000000..26ebf71066 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_tiktok_shop.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Tiktok Shop dataset tool. + */ +export const datasetTiktokShopTool: ToolConfig = { + id: 'brightdata_dataset_tiktok_shop', + name: 'Bright Data Tiktok Shop Dataset', + description: + 'Quickly read structured Tiktok shop data.\nRequires a valid Tiktok shop product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m45m1u911dsa4274pi', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_walmart_product.ts b/apps/sim/tools/brightdata/dataset_walmart_product.ts new file mode 100644 index 0000000000..d5bc38aad7 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_walmart_product.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Walmart Product dataset tool. + */ +export const datasetWalmartProductTool: ToolConfig = { + id: 'brightdata_dataset_walmart_product', + name: 'Bright Data Walmart Product Dataset', + description: + 'Quickly read structured walmart product data.\nRequires a valid product URL with /ip/ in it.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_l95fol7l1ru6rlo116', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_walmart_seller.ts b/apps/sim/tools/brightdata/dataset_walmart_seller.ts new file mode 100644 index 0000000000..b4bb24259d --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_walmart_seller.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Walmart Seller dataset tool. + */ +export const datasetWalmartSellerTool: ToolConfig = { + id: 'brightdata_dataset_walmart_seller', + name: 'Bright Data Walmart Seller Dataset', + description: + 'Quickly read structured walmart seller data.\nRequires a valid walmart seller URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m7ke48w81ocyu4hhz0', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_x_posts.ts b/apps/sim/tools/brightdata/dataset_x_posts.ts new file mode 100644 index 0000000000..14a218965e --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_x_posts.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data X Posts dataset tool. + */ +export const datasetXPostsTool: ToolConfig = { + id: 'brightdata_dataset_x_posts', + name: 'Bright Data X Posts Dataset', + description: + 'Quickly read structured X post data.\nRequires a valid X post URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lwxkxvnf1cynvib9co', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_yahoo_finance_business.ts b/apps/sim/tools/brightdata/dataset_yahoo_finance_business.ts new file mode 100644 index 0000000000..f3ec1c08a6 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_yahoo_finance_business.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Yahoo Finance Business dataset tool. + */ +export const datasetYahooFinanceBusinessTool: ToolConfig = { + id: 'brightdata_dataset_yahoo_finance_business', + name: 'Bright Data Yahoo Finance Business Dataset', + description: + 'Quickly read structured yahoo finance business data.\nRequires a valid yahoo finance business URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lmrpz3vxmz972ghd7', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_youtube_comments.ts b/apps/sim/tools/brightdata/dataset_youtube_comments.ts new file mode 100644 index 0000000000..77c06428c3 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_youtube_comments.ts @@ -0,0 +1,81 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Youtube Comments dataset tool. + */ +export const datasetYoutubeCommentsTool: ToolConfig = { + id: 'brightdata_dataset_youtube_comments', + name: 'Bright Data Youtube Comments Dataset', + description: + 'Quickly read structured youtube comments data.\nRequires a valid youtube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + num_of_comments: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Number of comments to fetch (default: 10)', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lk9q0ew71spt1mxywf', + apiToken: params.apiToken, + url: params.url, + num_of_comments: params.num_of_comments, + } + + if (body.num_of_comments === undefined) { + body.num_of_comments = '10' + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_youtube_profiles.ts b/apps/sim/tools/brightdata/dataset_youtube_profiles.ts new file mode 100644 index 0000000000..192b246406 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_youtube_profiles.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Youtube Profiles dataset tool. + */ +export const datasetYoutubeProfilesTool: ToolConfig = { + id: 'brightdata_dataset_youtube_profiles', + name: 'Bright Data Youtube Profiles Dataset', + description: + 'Quickly read structured youtube profiles data.\nRequires a valid youtube profile URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lk538t2k2p1k3oos71', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_youtube_videos.ts b/apps/sim/tools/brightdata/dataset_youtube_videos.ts new file mode 100644 index 0000000000..5308bdcfe0 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_youtube_videos.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Youtube Videos dataset tool. + */ +export const datasetYoutubeVideosTool: ToolConfig = { + id: 'brightdata_dataset_youtube_videos', + name: 'Bright Data Youtube Videos Dataset', + description: + 'Quickly read structured YouTube videos data.\nRequires a valid YouTube video URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lk56epmy2i5g7lzu0k', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_zara_products.ts b/apps/sim/tools/brightdata/dataset_zara_products.ts new file mode 100644 index 0000000000..536e633c2e --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_zara_products.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Zara Products dataset tool. + */ +export const datasetZaraProductsTool: ToolConfig = { + id: 'brightdata_dataset_zara_products', + name: 'Bright Data Zara Products Dataset', + description: + 'Quickly read structured zara product data.\nRequires a valid zara product URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lct4vafw1tgx27d4o0', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_zillow_properties_listing.ts b/apps/sim/tools/brightdata/dataset_zillow_properties_listing.ts new file mode 100644 index 0000000000..90aa09a0f5 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_zillow_properties_listing.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Zillow Properties Listing dataset tool. + */ +export const datasetZillowPropertiesListingTool: ToolConfig = { + id: 'brightdata_dataset_zillow_properties_listing', + name: 'Bright Data Zillow Properties Listing Dataset', + description: + 'Quickly read structured zillow properties listing data.\nRequires a valid zillow properties listing URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_lfqkr8wm13ixtbd8f5', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/dataset_zoominfo_company_profile.ts b/apps/sim/tools/brightdata/dataset_zoominfo_company_profile.ts new file mode 100644 index 0000000000..c1cae34157 --- /dev/null +++ b/apps/sim/tools/brightdata/dataset_zoominfo_company_profile.ts @@ -0,0 +1,70 @@ +import type { DatasetParams, DatasetResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data Zoominfo Company Profile dataset tool. + */ +export const datasetZoominfoCompanyProfileTool: ToolConfig = { + id: 'brightdata_dataset_zoominfo_company_profile', + name: 'Bright Data Zoominfo Company Profile Dataset', + description: + 'Quickly read structured ZoomInfo company profile data.\nRequires a valid ZoomInfo company URL.\nThis can be a cache lookup, so it can be more reliable than scraping', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Dataset input URL', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/dataset', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => { + const body: Record = { + datasetId: 'gd_m0ci4a4ivx3j5l6nx', + apiToken: params.apiToken, + url: params.url, + } + + return body + }, + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data dataset fetch failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + data: { + type: 'object', + description: 'Structured dataset response', + }, + snapshot_at: { + type: 'string', + description: 'Timestamp of data snapshot', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/index.ts b/apps/sim/tools/brightdata/index.ts new file mode 100644 index 0000000000..ced9f57611 --- /dev/null +++ b/apps/sim/tools/brightdata/index.ts @@ -0,0 +1,48 @@ +export { datasetAmazonProductTool } from './dataset_amazon_product' +export { datasetAmazonProductReviewsTool } from './dataset_amazon_product_reviews' +export { datasetAmazonProductSearchTool } from './dataset_amazon_product_search' +export { datasetAppleAppStoreTool } from './dataset_apple_app_store' +export { datasetBestbuyProductsTool } from './dataset_bestbuy_products' +export { datasetBookingHotelListingsTool } from './dataset_booking_hotel_listings' +export { datasetCrunchbaseCompanyTool } from './dataset_crunchbase_company' +export { datasetEbayProductTool } from './dataset_ebay_product' +export { datasetEtsyProductsTool } from './dataset_etsy_products' +export { datasetFacebookCompanyReviewsTool } from './dataset_facebook_company_reviews' +export { datasetFacebookEventsTool } from './dataset_facebook_events' +export { datasetFacebookMarketplaceListingsTool } from './dataset_facebook_marketplace_listings' +export { datasetFacebookPostsTool } from './dataset_facebook_posts' +export { datasetGithubRepositoryFileTool } from './dataset_github_repository_file' +export { datasetGoogleMapsReviewsTool } from './dataset_google_maps_reviews' +export { datasetGooglePlayStoreTool } from './dataset_google_play_store' +export { datasetGoogleShoppingTool } from './dataset_google_shopping' +export { datasetHomedepotProductsTool } from './dataset_homedepot_products' +export { datasetInstagramCommentsTool } from './dataset_instagram_comments' +export { datasetInstagramPostsTool } from './dataset_instagram_posts' +export { datasetInstagramProfilesTool } from './dataset_instagram_profiles' +export { datasetInstagramReelsTool } from './dataset_instagram_reels' +export { datasetLinkedinCompanyProfileTool } from './dataset_linkedin_company_profile' +export { datasetLinkedinJobListingsTool } from './dataset_linkedin_job_listings' +export { datasetLinkedinPeopleSearchTool } from './dataset_linkedin_people_search' +export { datasetLinkedinPersonProfileTool } from './dataset_linkedin_person_profile' +export { datasetLinkedinPostsTool } from './dataset_linkedin_posts' +export { datasetNpmPackageTool } from './dataset_npm_package' +export { datasetPypiPackageTool } from './dataset_pypi_package' +export { datasetRedditPostsTool } from './dataset_reddit_posts' +export { datasetReuterNewsTool } from './dataset_reuter_news' +export { datasetTiktokCommentsTool } from './dataset_tiktok_comments' +export { datasetTiktokPostsTool } from './dataset_tiktok_posts' +export { datasetTiktokProfilesTool } from './dataset_tiktok_profiles' +export { datasetTiktokShopTool } from './dataset_tiktok_shop' +export { datasetWalmartProductTool } from './dataset_walmart_product' +export { datasetWalmartSellerTool } from './dataset_walmart_seller' +export { datasetXPostsTool } from './dataset_x_posts' +export { datasetYahooFinanceBusinessTool } from './dataset_yahoo_finance_business' +export { datasetYoutubeCommentsTool } from './dataset_youtube_comments' +export { datasetYoutubeProfilesTool } from './dataset_youtube_profiles' +export { datasetYoutubeVideosTool } from './dataset_youtube_videos' +export { datasetZaraProductsTool } from './dataset_zara_products' +export { datasetZillowPropertiesListingTool } from './dataset_zillow_properties_listing' +export { datasetZoominfoCompanyProfileTool } from './dataset_zoominfo_company_profile' +export { scrapeMarkdownTool } from './scrape_markdown' +export { searchEngineTool } from './search_engine' +export * from './types' diff --git a/apps/sim/tools/brightdata/scrape_markdown.ts b/apps/sim/tools/brightdata/scrape_markdown.ts new file mode 100644 index 0000000000..bb1e20cea7 --- /dev/null +++ b/apps/sim/tools/brightdata/scrape_markdown.ts @@ -0,0 +1,75 @@ +import type { ScrapeMarkdownParams, ScrapeMarkdownResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data tool for scraping a URL into markdown. + */ +export const scrapeMarkdownTool: ToolConfig = { + id: 'brightdata_scrape_markdown', + name: 'Bright Data Scrape as Markdown', + description: 'Scrape any website and convert it to clean markdown format using Bright Data', + version: '1.0.0', + + params: { + url: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'The URL to scrape', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + unlockerZone: { + type: 'string', + required: false, + visibility: 'user-only', + description: 'Bright Data unlocker zone name (default: mcp_unlocker)', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/scrape-markdown', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => ({ + url: params.url, + apiToken: params.apiToken, + unlockerZone: params.unlockerZone || 'mcp_unlocker', + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data scrape failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + markdown: { + type: 'string', + description: 'The scraped content in markdown format', + }, + url: { + type: 'string', + description: 'The URL that was scraped', + }, + title: { + type: 'string', + description: 'The page title', + optional: true, + }, + }, +} diff --git a/apps/sim/tools/brightdata/search_engine.ts b/apps/sim/tools/brightdata/search_engine.ts new file mode 100644 index 0000000000..ff235907e1 --- /dev/null +++ b/apps/sim/tools/brightdata/search_engine.ts @@ -0,0 +1,73 @@ +import type { SearchEngineParams, SearchEngineResponse } from '@/tools/brightdata/types' +import type { ToolConfig } from '@/tools/types' + +/** + * Bright Data search engine tool. + */ +export const searchEngineTool: ToolConfig = { + id: 'brightdata_search_engine', + name: 'Bright Data Search Engine', + description: 'Search the web using Bright Data search engine', + version: '1.0.0', + + params: { + query: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Search query', + }, + maxResults: { + type: 'number', + required: false, + visibility: 'user-only', + description: 'Maximum number of results to return (default: 10)', + }, + apiToken: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Bright Data API token', + }, + unlockerZone: { + type: 'string', + required: false, + visibility: 'user-only', + description: 'Bright Data unlocker zone name', + }, + }, + + request: { + method: 'POST', + url: '/api/tools/brightdata/search-engine', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: (params) => ({ + query: params.query, + maxResults: params.maxResults || 10, + apiToken: params.apiToken, + unlockerZone: params.unlockerZone || 'mcp_unlocker', + }), + }, + + transformResponse: async (response: Response) => { + const data = await response.json() + + if (!response.ok) { + throw new Error(data.error || 'Bright Data search failed') + } + + return { + success: true, + output: data, + } + }, + + outputs: { + results: { + type: 'array', + description: 'Search results with title, URL, and snippet', + }, + }, +} diff --git a/apps/sim/tools/brightdata/types.ts b/apps/sim/tools/brightdata/types.ts new file mode 100644 index 0000000000..cf68b0fce0 --- /dev/null +++ b/apps/sim/tools/brightdata/types.ts @@ -0,0 +1,94 @@ +import type { ToolResponse } from '@/tools/types' + +export interface BrightDataAuthParams { + apiToken: string + unlockerZone?: string + browserZone?: string +} + +export interface ScrapeMarkdownParams extends BrightDataAuthParams { + url: string +} + +export interface ScrapeMarkdownResponse extends ToolResponse { + output: { + markdown: string + url: string + title?: string + } +} + +export interface SearchEngineParams extends BrightDataAuthParams { + query: string + maxResults?: number +} + +export interface SearchEngineResponse extends ToolResponse { + output: { + results: Array<{ + title: string + url: string + snippet: string + }> + } +} + +export interface BrowserNavigateParams extends BrightDataAuthParams { + url: string + sessionKey?: string + country?: string +} + +export interface BrowserNavigateResponse extends ToolResponse { + output: { + success: boolean + url: string + title?: string + sessionKey?: string + } +} + +export interface BrowserSnapshotParams extends BrightDataAuthParams { + sessionKey?: string +} + +export interface BrowserSnapshotResponse extends ToolResponse { + output: { + html: string + url: string + title?: string + } +} + +export interface BrowserScreenshotParams extends BrightDataAuthParams { + sessionKey?: string +} + +export interface BrowserScreenshotResponse extends ToolResponse { + output: { + image: string + url: string + } +} + +export interface DatasetParams extends BrightDataAuthParams { + datasetId: string + url?: string + keyword?: string + [key: string]: unknown +} + +export interface DatasetResponse extends ToolResponse { + output: { + data: unknown + snapshot_at?: string + } +} + +export type BrightDataResponse = + | ScrapeMarkdownResponse + | SearchEngineResponse + | BrowserNavigateResponse + | BrowserSnapshotResponse + | BrowserScreenshotResponse + | DatasetResponse diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index 6570eea636..0e6514e08b 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -62,6 +62,55 @@ import { asanaSearchTasksTool, asanaUpdateTaskTool, } from '@/tools/asana' +import { + datasetAmazonProductReviewsTool, + datasetAmazonProductSearchTool, + datasetAmazonProductTool, + datasetAppleAppStoreTool, + datasetBestbuyProductsTool, + datasetBookingHotelListingsTool, + datasetCrunchbaseCompanyTool, + datasetEbayProductTool, + datasetEtsyProductsTool, + datasetFacebookCompanyReviewsTool, + datasetFacebookEventsTool, + datasetFacebookMarketplaceListingsTool, + datasetFacebookPostsTool, + datasetGithubRepositoryFileTool, + datasetGoogleMapsReviewsTool, + datasetGooglePlayStoreTool, + datasetGoogleShoppingTool, + datasetHomedepotProductsTool, + datasetInstagramCommentsTool, + datasetInstagramPostsTool, + datasetInstagramProfilesTool, + datasetInstagramReelsTool, + datasetLinkedinCompanyProfileTool, + datasetLinkedinJobListingsTool, + datasetLinkedinPeopleSearchTool, + datasetLinkedinPersonProfileTool, + datasetLinkedinPostsTool, + datasetNpmPackageTool, + datasetPypiPackageTool, + datasetRedditPostsTool, + datasetReuterNewsTool, + datasetTiktokCommentsTool, + datasetTiktokPostsTool, + datasetTiktokProfilesTool, + datasetTiktokShopTool, + datasetWalmartProductTool, + datasetWalmartSellerTool, + datasetXPostsTool, + datasetYahooFinanceBusinessTool, + datasetYoutubeCommentsTool, + datasetYoutubeProfilesTool, + datasetYoutubeVideosTool, + datasetZaraProductsTool, + datasetZillowPropertiesListingTool, + datasetZoominfoCompanyProfileTool, + scrapeMarkdownTool, + searchEngineTool, +} from '@/tools/brightdata' import { browserUseRunTaskTool } from '@/tools/browser_use' import { calendlyCancelEventTool, @@ -1552,6 +1601,53 @@ export const tools: Record = { asana_get_projects: asanaGetProjectsTool, asana_search_tasks: asanaSearchTasksTool, asana_add_comment: asanaAddCommentTool, + brightdata_dataset_amazon_product: datasetAmazonProductTool, + brightdata_dataset_amazon_product_reviews: datasetAmazonProductReviewsTool, + brightdata_dataset_amazon_product_search: datasetAmazonProductSearchTool, + brightdata_dataset_walmart_product: datasetWalmartProductTool, + brightdata_dataset_walmart_seller: datasetWalmartSellerTool, + brightdata_dataset_ebay_product: datasetEbayProductTool, + brightdata_dataset_homedepot_products: datasetHomedepotProductsTool, + brightdata_dataset_zara_products: datasetZaraProductsTool, + brightdata_dataset_etsy_products: datasetEtsyProductsTool, + brightdata_dataset_bestbuy_products: datasetBestbuyProductsTool, + brightdata_dataset_linkedin_person_profile: datasetLinkedinPersonProfileTool, + brightdata_dataset_linkedin_company_profile: datasetLinkedinCompanyProfileTool, + brightdata_dataset_linkedin_job_listings: datasetLinkedinJobListingsTool, + brightdata_dataset_linkedin_posts: datasetLinkedinPostsTool, + brightdata_dataset_linkedin_people_search: datasetLinkedinPeopleSearchTool, + brightdata_dataset_crunchbase_company: datasetCrunchbaseCompanyTool, + brightdata_dataset_zoominfo_company_profile: datasetZoominfoCompanyProfileTool, + brightdata_dataset_instagram_profiles: datasetInstagramProfilesTool, + brightdata_dataset_instagram_posts: datasetInstagramPostsTool, + brightdata_dataset_instagram_reels: datasetInstagramReelsTool, + brightdata_dataset_instagram_comments: datasetInstagramCommentsTool, + brightdata_dataset_facebook_posts: datasetFacebookPostsTool, + brightdata_dataset_facebook_marketplace_listings: datasetFacebookMarketplaceListingsTool, + brightdata_dataset_facebook_company_reviews: datasetFacebookCompanyReviewsTool, + brightdata_dataset_facebook_events: datasetFacebookEventsTool, + brightdata_dataset_tiktok_profiles: datasetTiktokProfilesTool, + brightdata_dataset_tiktok_posts: datasetTiktokPostsTool, + brightdata_dataset_tiktok_shop: datasetTiktokShopTool, + brightdata_dataset_tiktok_comments: datasetTiktokCommentsTool, + brightdata_dataset_google_maps_reviews: datasetGoogleMapsReviewsTool, + brightdata_dataset_google_shopping: datasetGoogleShoppingTool, + brightdata_dataset_google_play_store: datasetGooglePlayStoreTool, + brightdata_dataset_apple_app_store: datasetAppleAppStoreTool, + brightdata_dataset_reuter_news: datasetReuterNewsTool, + brightdata_dataset_github_repository_file: datasetGithubRepositoryFileTool, + brightdata_dataset_yahoo_finance_business: datasetYahooFinanceBusinessTool, + brightdata_dataset_x_posts: datasetXPostsTool, + brightdata_dataset_zillow_properties_listing: datasetZillowPropertiesListingTool, + brightdata_dataset_booking_hotel_listings: datasetBookingHotelListingsTool, + brightdata_dataset_youtube_profiles: datasetYoutubeProfilesTool, + brightdata_dataset_youtube_comments: datasetYoutubeCommentsTool, + brightdata_dataset_reddit_posts: datasetRedditPostsTool, + brightdata_dataset_youtube_videos: datasetYoutubeVideosTool, + brightdata_dataset_npm_package: datasetNpmPackageTool, + brightdata_dataset_pypi_package: datasetPypiPackageTool, + brightdata_scrape_markdown: scrapeMarkdownTool, + brightdata_search_engine: searchEngineTool, browser_use_run_task: browserUseRunTaskTool, openai_embeddings: openAIEmbeddingsTool, http_request: httpRequestTool,