MVP, working sentry scraper

This commit is contained in:
Mike Conrad
2025-05-18 20:44:42 -04:00
parent a34b0899ce
commit 8ea49772df
29 changed files with 724 additions and 481 deletions

View File

@ -0,0 +1,80 @@
import Replay from '#models/replay'
import env from '#start/env'
import type { HttpContext } from '@adonisjs/core/http'
const SENTRY_TOKEN = env.get('SENTRY_TOKEN')
const SENTRY_ORG = env.get('SENTRY_ORG')
let recordsUpdated = 0
export default class ReplaysController {
async index({ request, response }: HttpContext) {
const {statsPeriod, start, end} = request.qs()
recordsUpdated = 0
let queryString: string = '?statsPeriod=24h'// Default in case none is provided
if (statsPeriod) {
queryString = `?statsPeriod=${statsPeriod}`
} else if (start && end) {
queryString = `?start=${start}&end=${end}`
}
const replays = await fetchBatch(`https://sentry.io/api/0/organizations/${SENTRY_ORG}/replays/${queryString}`)
return response.json(replays)
}
}
async function fetchBatch(url: string) {
const options: RequestInit = {
headers: {
Authorization: `Bearer ${SENTRY_TOKEN}`
}
}
const req = await fetch(url, options)
const resp = await req.json() as unknown
const replays = await resp.data as unknown
const headers = await req.headers
const cleanedData = replays.map(record => sanitizeInput(record, Replay.allowedFields))
let updated = await Replay.updateOrCreateMany('id', cleanedData )
recordsUpdated = recordsUpdated + updated.length
const pagination = parseSentryLinkHeader(headers.get('link'))
if (pagination.hasNextResults == true) {
console.log('fetching', pagination.next)
await fetchBatch(pagination.next)
}
console.log('no more results')
return {recordsUpdated}
}
function parseSentryLinkHeader(header:string) {
const links = header.split(',').map(part => part.trim())
const result = {}
for (const link of links) {
const match = link.match(/<([^>]+)>;\s*rel="([^"]+)";\s*results="([^"]+)";\s*cursor="([^"]+)"/)
if (!match) continue
const [, url, rel, results] = match
if (rel === 'previous') {
result.previous = url
result.hasPreviousResults = results === 'true'
} else if (rel === 'next') {
result.next = url
result.hasNextResults = results === 'true'
}
}
return result
}
function sanitizeInput(data: Record<string, any>, allowedFields: string[]) {
return allowedFields.reduce((acc, key) => {
if (key in data) acc[key] = data[key]
return acc
}, {} as Record<string, any>)
}

View File

@ -22,4 +22,4 @@ export default class AuthMiddleware {
await ctx.auth.authenticateUsing(options.guards, { loginRoute: this.redirectTo })
return next()
}
}
}

View File

@ -28,4 +28,4 @@ export default class GuestMiddleware {
return next()
}
}
}

View File

@ -8,12 +8,9 @@ import type { NextFn } from '@adonisjs/core/types/http'
* The request continues as usual, even when the user is not logged-in.
*/
export default class SilentAuthMiddleware {
async handle(
ctx: HttpContext,
next: NextFn,
) {
async handle(ctx: HttpContext, next: NextFn) {
await ctx.auth.check()
return next()
}
}
}

172
app/models/replay.ts Normal file
View File

@ -0,0 +1,172 @@
import { DateTime } from 'luxon'
import { BaseModel, column } from '@adonisjs/lucid/orm'
export default class Replay extends BaseModel {
@column({ isPrimary: true })
declare id: string
@column()
declare project_id: string
@column({
prepare: (value) => {
// The values from sentry are just arrays so convert them to json
return JSON.stringify(value)
}
})
declare trace_ids: string[]
@column({
prepare: (value) => {
return JSON.stringify(value)
}
})
declare error_ids: string[]
@column()
declare environment: string | null
@column({
prepare: (value) => {
// The values from sentry are just arrays so convert them to json
return JSON.stringify(value)
}
})
declare tags: any
@column()
declare user: any
@column()
declare sdk: any
@column()
declare os: any
@column()
declare browser: any
@column()
declare device: any
@column()
declare ota_updates: any
@column()
declare is_archived: boolean | null
@column({
prepare: (value) => {
// The values from sentry are just arrays so convert them to json
return JSON.stringify(value)
}
})
declare urls: any
@column({
prepare: (value) => {
// The values from sentry are just arrays so convert them to json
return JSON.stringify(value)
}
})
declare clicks: any
@column()
declare count_dead_clicks: number | null
@column()
declare count_rage_clicks: number | null
@column()
declare count_errors: number | null
@column()
declare duration: number | null
@column.dateTime()
declare finished_at: DateTime | null
@column.dateTime({serializeAs: 'started_at'})
declare started_at: DateTime | null
@column()
declare activity: number | null
@column()
declare count_urls: number | null
@column()
declare replay_type: string
@column()
declare count_segments: number | null
@column()
declare platform: string | null
@column({
prepare: (value) => {
// The values from sentry are just arrays so convert them to json
return JSON.stringify(value)
}
})
declare releases: any
@column()
declare dist: string | null
@column()
declare count_warnings: number | null
@column()
declare count_infos: number | null
@column()
declare has_viewed: boolean
@column.dateTime({ autoCreate: true })
declare created_at: DateTime
@column.dateTime({ autoCreate: true, autoUpdate: true })
declare updated_at: DateTime
public static allowedFields = [
'id',
'project_id',
'trace_ids',
'error_ids',
'environment',
'tags',
'user',
'sdk',
'os',
'browser',
'device',
'ota_updates',
'is_archived',
'urls',
'clicks',
'count_dead_clicks',
'count_rage_clicks',
'count_errors',
'duration',
'finished_at',
'started_at',
'activity',
'count_urls',
'replay_type',
'count_segments',
'platform',
'releases',
'dist',
'count_warnings',
'count_infos',
'has_viewed',
]
}

View File

@ -27,4 +27,4 @@ export default class User extends compose(BaseModel, AuthFinder) {
@column.dateTime({ autoCreate: true, autoUpdate: true })
declare updatedAt: DateTime | null
}
}