Athena - Part 4 - Analytics API
Setting up the analytics API to query the data



Back to Part 3 - Data Transformation
Part 4 - Athena Analytics API
or how to use the aws sdk version 3 to query Athena.
There are a couple of ways to do this. Firstly, are the results of the query going to returned to the user or is it part of a process that will be used by another service?
If the results are going to be returned to the user then the API needs to be able to handle the query and return the results. If the results are going to be used by another service then the API just needs to be able to handle the query and return the query id.
For this example we're going to return the results to the user.
Setting up the API
Before we write the code we need to set up the API. This is done using terraform:
resource "aws_api_gateway_rest_api" "analytics_api" {
name = "analytics_api"
description = "Analytics API"
}
resource "aws_api_gateway_resource" "analytics_api" {
rest_api_id = aws_api_gateway_rest_api.analytics.id
parent_id = aws_api_gateway_rest_api.analytics.root_resource_id
path_part = "analytics"
}
resource "aws_api_gateway_method" "analytics_api" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = "GET"
authorization = "NONE"
api_key_required = true // protected by an API key
request_validator_id = aws_api_gateway_request_validator.analytics_api.id
}
resource "aws_api_gateway_integration" "analytics_api" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = aws_api_gateway_method.analytics_api.http_method
integration_http_method = "POST"
type = "AWS_PROXY"
uri = aws_lambda_function.analytics_api.invoke_arn
}
resource "aws_api_gateway_method" "analytics_api_options" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = "OPTIONS"
authorization = "NONE"
request_validator_id = aws_api_gateway_request_validator.analytics_api.id
}
resource "aws_api_gateway_integration" "analytics_api_options" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = aws_api_gateway_method.analytics_api_options.http_method
type = "MOCK"
request_templates = {
"application/json" = jsonencode(
{
statusCode = 200
}
)
}
}
resource "aws_api_gateway_method_response" "analytics_api_options" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = aws_api_gateway_method.analytics_api_options.http_method
status_code = 200
response_parameters = {
"method.response.header.Access-Control-Allow-Headers" = true,
"method.response.header.Access-Control-Allow-Methods" = true,
"method.response.header.Access-Control-Allow-Origin" = true
}
}
resource "aws_api_gateway_integration_response" "analytics_api_options" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
resource_id = aws_api_gateway_resource.analytics_api.id
http_method = aws_api_gateway_method_response.analytics_api_options.http_method
status_code = aws_api_gateway_method_response.analytics_api_options.status_code
response_parameters = {
"method.response.header.Access-Control-Allow-Headers" = "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent,X-Amzn-Trace-Id'",
"method.response.header.Access-Control-Allow-Methods" = "'OPTIONS,GET'",
"method.response.header.Access-Control-Allow-Origin" = "'${var.cors_origin}'"
}
response_templates = {
"application/json" = ""
}
}
resource "aws_api_gateway_method_settings" "analytics_api" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
stage_name = aws_api_gateway_stage.analytics_api.stage_name
method_path = "${aws_api_gateway_resource.analytics_api.path}/*/GET"
settings {
metrics_enabled = true
logging_level = "INFO"
}
}
resource "aws_api_gateway_request_validator" "analytics_api" {
name = "${var.namespace}-analytics-api-${var.environment}"
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
validate_request_parameters = true
}
resource "aws_api_gateway_deployment" "analytics_api" {
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
triggers = {
redeployment = sha1(join(",", [
jsonencode(aws_api_gateway_rest_api.analytics_api),
jsonencode(aws_api_gateway_resource.analytics_api),
jsonencode(aws_api_gateway_resource.analytics_api_measure),
jsonencode(aws_api_gateway_resource.analytics_api_period),
jsonencode(aws_api_gateway_resource.analytics_api_start),
jsonencode(aws_api_gateway_method.analytics_api),
jsonencode(aws_api_gateway_integration.analytics_api),
jsonencode(aws_api_gateway_method.analytics_api_options),
jsonencode(aws_api_gateway_integration.analytics_api_options),
jsonencode(aws_api_gateway_method.analytics_api_options),
jsonencode(aws_api_gateway_method_response.analytics_api_options),
jsonencode(aws_api_gateway_integration_response.analytics_api_options)
]))
}
lifecycle {
create_before_destroy = true
}
}
resource "aws_api_gateway_stage" "analytics_api" {
deployment_id = aws_api_gateway_deployment.analytics_api.id
rest_api_id = aws_api_gateway_rest_api.analytics_api.id
stage_name = "default"
}
resource "aws_api_gateway_usage_plan" "analytics_api" {
name = "${var.namespace}-analytics-api-${var.environment}"
api_stages {
api_id = aws_api_gateway_rest_api.analytics_api.id
stage = aws_api_gateway_stage.analytics_api.stage_name
}
}
resource "aws_api_gateway_api_key" "analytics_api" {
name = "${var.namespace}-analytics-api-${var.environment}"
}
resource "aws_api_gateway_usage_plan_key" "analytics_api" {
key_id = aws_api_gateway_api_key.analytics_api.id
key_type = "API_KEY"
usage_plan_id = aws_api_gateway_usage_plan.analytics_api.id
}
Whew! That's a lot of code. I could break it down, but that is the bare minimum required to deploy a secure api in aws using terraform.
The lambda API handler
To return results directly from Athena we need to split the code into 3 areas:
- start the query
- wait for the query to finish
- process and return the results
Start the query
const client = new AthenaClient({ region: 'eu-west-2' })
// This is the simple part
const command = new StartQueryExecutionCommand({
QueryString: 'select * from analytics limit 10',
})
const result = await client.send(command)
The above kicks off the Athena query and is an asynchronous operation. We need to wait for the query to finish before we can return the results. We can check the status of the query using the 'getQueryExecution' command. Here's a function that manages the response from the 'getQueryExecution' command:
const waitForQueryResults = async (id: string) => {
let response
let isQueryStillRunning = true
while (isQueryStillRunning) {
isQueryStillRunning = false
const command = new GetQueryExecutionCommand({
QueryExecutionId: id,
})
response = await client.send(command)
const state = response.QueryExecution?.Status?.State
if (state === 'QUEUED' || state === 'RUNNING') {
isQueryStillRunning = true
}
if (state === 'FAILED') {
logger.error({ message: response.QueryExecution?.Status })
throw Error('query failed')
}
}
logger.info({ message: 'results for query id returned', queryId: id })
}
//invoke like this:
await waitForQueryResults(result.QueryExecutionId)
Process and return the results
Once the query has finished we can return the results. We can use the getQueryResults command to get the results:
const command = new GetQueryResultsCommand({
QueryExecutionId: id,
})
const queryResults = await client.send(command)
Conclusion
So there we have it! A full pipeline for recording analytics data, storing it cheaply and querying it quickly. We can now use the data to power our dashboards and make decisions based on the data. If you have any questions or comments please get in touch!