fix: datalake fixes (#8251)

This commit is contained in:
Alexander Onnikov 2025-03-17 22:11:02 +07:00 committed by GitHub
parent 2b03c56939
commit e6acd6082b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 75 additions and 41 deletions

19
.vscode/launch.json vendored
View File

@ -745,6 +745,25 @@
"sourceMaps": true,
"cwd": "${workspaceRoot}/services/telegram-bot/pod-telegram-bot"
},
{
"name": "Debug datalake",
"type": "node",
"request": "launch",
"args": ["src/index.ts"],
"env": {
"PORT": "4030",
"SECRET": "secret",
"DB_URL": "",
"BUCKETS": "",
"ACCOUNTS_URL": "http://localhost:3000",
"STATS_URL": "http://huly.local:4900",
"STREAM_URL": "http://huly.local:1080/recording"
},
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"runtimeVersion": "20",
"sourceMaps": true,
"cwd": "${workspaceRoot}/services/datalake/pod-datalake"
},
{
"type": "chrome",
"name": "Attach to Browser",

View File

@ -369,9 +369,7 @@ export class DatalakeClient {
})
}
// R2
async getR2UploadParams (ctx: MeasureContext, workspace: WorkspaceUuid): Promise<R2UploadParams> {
async getS3UploadParams (ctx: MeasureContext, workspace: WorkspaceUuid): Promise<R2UploadParams> {
const path = `/upload/s3/${workspace}`
const url = concatLink(this.endpoint, path)
@ -380,7 +378,7 @@ export class DatalakeClient {
return json
}
async uploadFromR2 (
async createFromS3 (
ctx: MeasureContext,
workspace: WorkspaceUuid,
objectName: string,

View File

@ -123,7 +123,7 @@ export async function handleBlobHead (
res.setHeader('Last-Modified', new Date(head.lastModified).toUTCString())
res.setHeader('ETag', head.etag)
res.status(204).send()
res.status(200).send()
}
export async function handleBlobDelete (

View File

@ -47,7 +47,7 @@ export async function handleS3CreateBlob (
res.status(200).send()
} catch (err: any) {
const error = err instanceof Error ? err.message : String(err)
console.error('failed to create blob', { workspace, name, error })
ctx.error('failed to create blob', { workspace, name, error })
res.status(500).send()
}
}

View File

@ -41,47 +41,63 @@ class S3BucketImpl implements S3Bucket {
) {}
async head (ctx: MeasureContext, key: string): Promise<S3Object | null> {
const result = await ctx.with('s3.headObject', {}, () => this.client.headObject({ Bucket: this.bucket, Key: key }))
try {
const result = await ctx.with('s3.headObject', {}, () =>
this.client.headObject({ Bucket: this.bucket, Key: key })
)
return {
key,
etag: result.ETag ?? '',
size: result.ContentLength ?? 0,
contentType: result.ContentType ?? '',
lastModified: result.LastModified?.getTime() ?? 0,
cacheControl: result.CacheControl
return {
key,
etag: result.ETag ?? '',
size: result.ContentLength ?? 0,
contentType: result.ContentType ?? '',
lastModified: result.LastModified?.getTime() ?? 0,
cacheControl: result.CacheControl
}
} catch (err: any) {
if (err?.$metadata?.httpStatusCode !== 404) {
ctx.warn('no object found', { error: err, key })
}
return null
}
}
async get (ctx: MeasureContext, key: string, options?: S3GetOptions): Promise<S3ObjectBody | null> {
const command = { Bucket: this.bucket, Key: key, Range: options?.range }
try {
const command = { Bucket: this.bucket, Key: key, Range: options?.range }
const result = await ctx.with('s3.getObject', {}, () => this.client.getObject(command))
const result = await ctx.with('s3.getObject', {}, () => this.client.getObject(command))
if (result.Body === undefined) {
if (result.Body === undefined) {
return null
}
const stream = result.Body?.transformToWebStream()
if (stream === undefined) {
return null
}
const lastModified =
result.Metadata?.['last-modified'] !== undefined
? new Date(result.Metadata['last-modified']).getTime()
: result.LastModified?.getTime()
return {
key,
body: Readable.fromWeb(stream as ReadableStream<any>),
range: result.ContentRange,
etag: result.ETag ?? '',
size: result.ContentLength ?? 0,
contentType: result.ContentType ?? '',
lastModified: lastModified ?? 0,
cacheControl: result.CacheControl
}
} catch (err: any) {
if (err?.$metadata?.httpStatusCode !== 404) {
ctx.warn('no object found', { error: err, key })
}
return null
}
const stream = result.Body?.transformToWebStream()
if (stream === undefined) {
return null
}
const lastModified =
result.Metadata?.['last-modified'] !== undefined
? new Date(result.Metadata['last-modified']).getTime()
: result.LastModified?.getTime()
return {
key,
body: Readable.fromWeb(stream as ReadableStream<any>),
range: result.ContentRange,
etag: result.ETag ?? '',
size: result.ContentLength ?? 0,
contentType: result.ContentType ?? '',
lastModified: lastModified ?? 0,
cacheControl: result.CacheControl
}
}
async put (

View File

@ -311,7 +311,8 @@ const startRecord = async (
accessKey,
region,
secret,
bucket
bucket,
forcePathStyle: true
})
}
})

View File

@ -110,7 +110,7 @@ async function getS3UploadParamsDatalake (
): Promise<S3UploadParams> {
const token = generateToken(systemAccountUuid, undefined, { service: 'love' })
const client = createDatalakeClient(config, token)
const { bucket } = await client.getR2UploadParams(ctx, workspaceId)
const { bucket } = await client.getS3UploadParams(ctx, workspaceId)
const endpoint = s3config.endpoint
const accessKey = s3config.accessKey
@ -156,7 +156,7 @@ async function saveFileToDatalake (
const prefix = rootPrefix(s3config, wsIds.uuid)
const uuid = stripPrefix(prefix, filename)
await client.uploadFromR2(ctx, wsIds.uuid, uuid, { filename: uuid })
await client.createFromS3(ctx, wsIds.uuid, uuid, { filename: uuid })
return await storageAdapter.stat(ctx, wsIds, uuid)
}