Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ WORKDIR /app
COPY package.json yarn.lock* ./
COPY ./packages/db ./packages/db
COPY ./packages/schemas ./packages/schemas
COPY ./packages/crypto ./packages/crypto
RUN yarn workspace @sourcebot/db install --frozen-lockfile
RUN yarn workspace @sourcebot/schemas install --frozen-lockfile
RUN yarn workspace @sourcebot/crypto install --frozen-lockfile

# ------ Build Web ------
FROM node-alpine AS web-builder
Expand All @@ -30,6 +32,7 @@ COPY ./packages/web ./packages/web
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto

# Fixes arm64 timeouts
RUN yarn config set registry https://registry.npmjs.org/
Expand Down Expand Up @@ -60,6 +63,7 @@ COPY ./packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
RUN yarn workspace @sourcebot/backend install --frozen-lockfile
RUN yarn workspace @sourcebot/backend build

Expand Down Expand Up @@ -100,7 +104,7 @@ ENV POSTHOG_PAPIK=$POSTHOG_PAPIK
# ENV SOURCEBOT_TELEMETRY_DISABLED=1

# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl

# Configure zoekt
COPY vendor/zoekt/install-ctags-alpine.sh .
Expand Down Expand Up @@ -129,6 +133,7 @@ COPY --from=backend-builder /app/packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto

# Configure the database
RUN mkdir -p /run/postgresql && \
Expand All @@ -143,6 +148,8 @@ RUN chmod +x ./entrypoint.sh

COPY default-config.json .

ENV SOURCEBOT_ENCRYPTION_KEY=""

EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -374,14 +374,20 @@ docker run <b>-v /path/to/my-repo:/repos/my-repo</b> /* additional args */ ghcr.

5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information.

6. Start Sourcebot with the command:
6. Create `.env.local` files in the `packages/backend` and `packages/web` directories with the following contents:
```sh
# You can use https://acte.ltd/utils/randomkeygen to generate a key ("Encryption key 256")
SOURCEBOT_ENCRYPTION_KEY="32-byte-secret-key"
```

7. Start Sourcebot with the command:
```sh
yarn dev
```

A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`.

7. Start searching at `http://localhost:3000`.
8. Start searching at `http://localhost:3000`.

## Telemetry

Expand Down
16 changes: 16 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ if [ ! -d "$DB_DATA_DIR" ]; then
su postgres -c "initdb -D $DB_DATA_DIR"
fi

if [ -z "$SOURCEBOT_ENCRYPTION_KEY" ]; then
echo -e "\e[31m[Error] SOURCEBOT_ENCRYPTION_KEY is not set.\e[0m"

if [ -f "$DATA_CACHE_DIR/.secret" ]; then
echo -e "\e[34m[Info] Loading environment variables from $DATA_CACHE_DIR/.secret\e[0m"
else
echo -e "\e[34m[Info] Generating a new encryption key...\e[0m"
SOURCEBOT_ENCRYPTION_KEY=$(openssl rand -base64 24)
echo "SOURCEBOT_ENCRYPTION_KEY=\"$SOURCEBOT_ENCRYPTION_KEY\"" >> "$DATA_CACHE_DIR/.secret"
fi

set -a
. "$DATA_CACHE_DIR/.secret"
set +a
fi

# In order to detect if this is the first run, we create a `.installed` file in
# the cache directory.
FIRST_RUN_FILE="$DATA_CACHE_DIR/.installedv2"
Expand Down
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"lowdb": "^7.0.1",
"micromatch": "^4.0.8",
"posthog-node": "^4.2.1",
"@sourcebot/crypto": "^0.1.0",
"@sourcebot/db": "^0.1.0",
"@sourcebot/schemas": "^0.1.0",
"simple-git": "^3.27.0",
Expand Down
10 changes: 3 additions & 7 deletions packages/backend/src/connectionManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "./logger.js";
import os from 'os';
import { Redis } from 'ioredis';
import { getTokenFromConfig, marshalBool } from "./utils.js";
import { marshalBool } from "./utils.js";
import { getGitHubReposFromConfig } from "./github.js";

interface IConnectionManager {
Expand Down Expand Up @@ -70,17 +70,13 @@ export class ConnectionManager implements IConnectionManager {
const repoData: RepoData[] = await (async () => {
switch (config.type) {
case 'github': {
const token = config.token ? getTokenFromConfig(config.token, this.context) : undefined;
const gitHubRepos = await getGitHubReposFromConfig(config, abortController.signal, this.context);
const gitHubRepos = await getGitHubReposFromConfig(config, orgId, this.db, abortController.signal);
const hostUrl = config.url ?? 'https://github.com';
const hostname = config.url ? new URL(config.url).hostname : 'github.com';

return gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`;
const cloneUrl = new URL(repo.clone_url!);
if (token) {
cloneUrl.username = token;
}

const record: RepoData = {
external_id: repo.id.toString(),
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/gitea.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ import micromatch from 'micromatch';

const logger = createLogger('Gitea');

export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGiteaReposFromConfig = async (config: GiteaConfig, orgId: number, ctx: AppContext) => {
// TODO: pass in DB here to fetch secret properly
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;

const api = giteaApi(config.url ?? 'https://gitea.com', {
token,
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { createLogger } from "./logger.js";
import { AppContext } from "./types.js";
import { getTokenFromConfig, measure } from "./utils.js";
import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";

const logger = createLogger("GitHub");

Expand All @@ -25,8 +26,8 @@ export type OctokitRepository = {
size?: number,
}

export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
const token = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;

const octokit = new Octokit({
auth: token,
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/gitlab.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import { getTokenFromConfig, measure } from "./utils.js";
const logger = createLogger("GitLab");
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";

export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGitLabReposFromConfig = async (config: GitLabConfig, orgId: number, ctx: AppContext) => {
// TODO: pass in DB here to fetch secret properly
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;
const api = new Gitlab({
...(config.token ? {
token,
Expand Down
63 changes: 54 additions & 9 deletions packages/backend/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,50 @@
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus } from '@sourcebot/db';
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus, RepoToConnection, Connection } from '@sourcebot/db';
import { existsSync } from 'fs';
import { cloneRepository, fetchRepository } from "./git.js";
import { createLogger } from "./logger.js";
import { captureEvent } from "./posthog.js";
import { AppContext } from "./types.js";
import { getRepoPath, measure } from "./utils.js";
import { getRepoPath, getTokenFromConfig, measure } from "./utils.js";
import { indexGitRepository } from "./zoekt.js";
import { DEFAULT_SETTINGS } from './constants.js';
import { Queue, Worker, Job } from 'bullmq';
import { Redis } from 'ioredis';
import * as os from 'os';
import { ConnectionManager } from './connectionManager.js';
import { ConnectionConfig } from '@sourcebot/schemas/v3/connection.type';

const logger = createLogger('main');

const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection})[] };

// TODO: do this better? ex: try using the tokens from all the connections
// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referrencing.
const getTokenForRepo = async (repo: RepoWithConnections, db: PrismaClient) => {
const repoConnections = repo.connections;
if (repoConnections.length === 0) {
logger.error(`Repo ${repo.id} has no connections`);
return;
}

let token: string | undefined;
for (const repoConnection of repoConnections) {
const connection = repoConnection.connection;
const config = connection.config as unknown as ConnectionConfig;
if (config.token) {
token = await getTokenFromConfig(config.token, connection.orgId, db);
if (token) {
break;
}
}
}

return token;
}

const syncGitRepository = async (repo: RepoWithConnections, ctx: AppContext, db: PrismaClient) => {
let fetchDuration_s: number | undefined = undefined;
let cloneDuration_s: number | undefined = undefined;

Expand All @@ -35,7 +65,15 @@ const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
} else {
logger.info(`Cloning ${repo.id}...`);

const { durationMs } = await measure(() => cloneRepository(repo.cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
const token = await getTokenForRepo(repo, db);
let cloneUrl = repo.cloneUrl;
if (token) {
const url = new URL(cloneUrl);
url.username = token;
cloneUrl = url.toString();
}

const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;
Expand Down Expand Up @@ -92,13 +130,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {

const connectionManager = new ConnectionManager(db, DEFAULT_SETTINGS, redis, context);
setInterval(async () => {
const configs = await db.connection.findMany({
const connections = await db.connection.findMany({
where: {
syncStatus: ConnectionSyncStatus.SYNC_NEEDED,
}
});
for (const config of configs) {
await connectionManager.scheduleConnectionSync(config);
for (const connection of connections) {
await connectionManager.scheduleConnectionSync(connection);
}
}, DEFAULT_SETTINGS.resyncConnectionPollingIntervalMs);

Expand All @@ -111,13 +149,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
const numWorkers = numCores * DEFAULT_SETTINGS.indexConcurrencyMultiple;
logger.info(`Detected ${numCores} cores. Setting repo index max concurrency to ${numWorkers}`);
const worker = new Worker('indexQueue', async (job: Job) => {
const repo = job.data as Repo;
const repo = job.data as RepoWithConnections;

let indexDuration_s: number | undefined;
let fetchDuration_s: number | undefined;
let cloneDuration_s: number | undefined;

const stats = await syncGitRepository(repo, context);
const stats = await syncGitRepository(repo, context, db);
indexDuration_s = stats.indexDuration_s;
fetchDuration_s = stats.fetchDuration_s;
cloneDuration_s = stats.cloneDuration_s;
Expand Down Expand Up @@ -171,6 +209,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
{ indexedAt: { lt: thresholdDate } },
{ repoIndexingStatus: RepoIndexingStatus.NEW }
]
},
include: {
connections: {
include: {
connection: true
}
}
}
});
addReposToQueue(db, indexQueue, repos);
Expand Down
38 changes: 32 additions & 6 deletions packages/backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import { Logger } from "winston";
import { AppContext, Repository } from "./types.js";
import path from 'path';
import micromatch from "micromatch";
import { Repo } from "@sourcebot/db";
import { PrismaClient, Repo } from "@sourcebot/db";
import { decrypt } from "@sourcebot/crypto";
import { Token } from "@sourcebot/schemas/v3/shared.type";

export const measure = async <T>(cb : () => Promise<T>) => {
const start = Date.now();
Expand Down Expand Up @@ -86,15 +88,39 @@ export const excludeReposByTopic = <T extends Repository>(repos: T[], excludedRe
});
}

export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
export const getTokenFromConfig = async (token: Token, orgId: number, db?: PrismaClient) => {
if (typeof token === 'string') {
return token;
}
const tokenValue = process.env[token.env];
if (!tokenValue) {
throw new Error(`The environment variable '${token.env}' was referenced in ${ctx.configPath}, but was not set.`);
if ('env' in token) {
const tokenValue = process.env[token.env];
if (!tokenValue) {
throw new Error(`The environment variable '${token.env}' was referenced in the config but was not set.`);
}
return tokenValue;
} else if ('secret' in token) {
if (!db) {
throw new Error(`Database connection required to retrieve secret`);
}

const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
}
}
});

if (!secret) {
throw new Error(`Secret with key ${secretKey} not found for org ${orgId}`);
}

const decryptedSecret = decrypt(secret.iv, secret.encryptedValue);
return decryptedSecret;
}
return tokenValue;
throw new Error(`Invalid token configuration in config`);
}

export const isRemotePath = (path: string) => {
Expand Down
1 change: 1 addition & 0 deletions packages/crypto/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env.local
16 changes: 16 additions & 0 deletions packages/crypto/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "@sourcebot/crypto",
"main": "dist/index.js",
"version": "0.1.0",
"scripts": {
"build": "tsc",
"postinstall": "yarn build"
},
"dependencies": {
"dotenv": "^16.4.5"
},
"devDependencies": {
"@types/node": "^22.7.5",
"typescript": "^5.7.3"
}
}
17 changes: 17 additions & 0 deletions packages/crypto/src/environment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import dotenv from 'dotenv';

export const getEnv = (env: string | undefined, defaultValue?: string, required?: boolean) => {
if (required && !env && !defaultValue) {
throw new Error(`Missing required environment variable`);
}

return env ?? defaultValue;
}

dotenv.config({
path: './.env.local',
override: true
});

// @note: You can use https://generate-random.org/encryption-key-generator to create a new 32 byte key
export const SOURCEBOT_ENCRYPTION_KEY = getEnv(process.env.SOURCEBOT_ENCRYPTION_KEY, undefined, true)!;
Loading