6 Query Fee Proportions
Summarize the query volume data
# Calculate query volume over the last 48 hours
# Handle case where query_volume might be empty
if (nrow(query_volume) > 0) {
max(query_volume$date, na.rm = TRUE)
latest_date <-# Use base R approach for date subtraction to avoid dependency
as.Date(latest_date) - 2
start_date_48h <-else {
} # If no query data, use recent dates for consistency
Sys.Date()
latest_date <- latest_date - 2
start_date_48h <-
}
query_volume %>%
query_volume_48h <- filter(date >= start_date_48h) %>%
# rename subgraph_id to deployment (to match our other data sources)
rename(deployment = subgraph_deployment_ipfs_hash) %>%
# group by subgraph deployment
group_by(deployment) %>%
# sum the number of queries by deployment over the 48h period
# Use na.rm=TRUE just in case query_count has NAs
summarize(sum_queries_48h = sum(query_count, na.rm = TRUE)) %>%
# sort the data by deployments with largest number of queries first
arrange(desc(sum_queries_48h))
# Optional: Keep historical calculation if needed elsewhere
# filtered_query_volume = query_volume %>% ... (original calculation) ...
## # A tibble: 0 × 2
## # ℹ 2 variables: deployment <chr>, sum_queries_48h <dbl>
Join results back to the list of synced subgraphs:
# Join the 48h query volume results back to the synced list
# Ensure previous joins haven't created duplicate deployment columns
%<>%
synced_subgraphs select(-any_of(c("sum_queries", "queries_per_indexer"))) %>% # Remove old cols if they exist
left_join(query_volume_48h, by='deployment')
# Optional: Join historical volume if needed elsewhere
# synced_subgraphs %<>% left_join(filtered_query_volume, by='deployment', suffix = c("", "_hist"))
## # A tibble: 350 × 6
## deployment unique_indexers signalled_tokens total_allocated_tokens
## <chr> <int> <dbl> <dbl>
## 1 QmZ3Cuso8BeNYjPdEG3B… NA NA NA
## 2 QmSrPAGCzBapQXyRtV3P… NA NA NA
## 3 QmefHBEBk7sFi9r4SFT3… NA NA NA
## 4 QmUzRg2HHMpbgf6Q4VHK… NA NA NA
## 5 QmVyhWS9Q32kcr3si1JN… NA NA NA
## 6 QmXvdb33USYAq4XfPyAY… 1 0 1
## 7 QmXLQ8QGxn8veGB9xUEm… NA NA NA
## 8 QmNR4EtKBGFrfdBJQhn7… NA NA NA
## 9 QmQuCY9bwTF13ZkPdpgy… NA NA NA
## 10 QmZjMSBvMuW3H1kjmguj… NA NA NA
## # ℹ 340 more rows
## # ℹ 2 more variables: rewards_proportion <dbl>, sum_queries_48h <dbl>
Calculate the number of queries available per indexer (over 48h):
%<>%
synced_subgraphs # Calculate queries per indexer based on 48h data
# Ensure unique_indexers is not 0 or NA to avoid division errors
# Replace NA sum_queries_48h with 0
mutate(sum_queries_48h = ifelse(is.na(sum_queries_48h), 0, sum_queries_48h)) %>%
mutate(queries_per_indexer_48h = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_48h / unique_indexers))
# Optional: Calculate historical queries per indexer if needed
# mutate(queries_per_indexer_hist = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_hist / unique_indexers))
## # A tibble: 350 × 7
## deployment unique_indexers signalled_tokens total_allocated_tokens
## <chr> <int> <dbl> <dbl>
## 1 QmZ3Cuso8BeNYjPdEG3B… NA NA NA
## 2 QmSrPAGCzBapQXyRtV3P… NA NA NA
## 3 QmefHBEBk7sFi9r4SFT3… NA NA NA
## 4 QmUzRg2HHMpbgf6Q4VHK… NA NA NA
## 5 QmVyhWS9Q32kcr3si1JN… NA NA NA
## 6 QmXvdb33USYAq4XfPyAY… 1 0 1
## 7 QmXLQ8QGxn8veGB9xUEm… NA NA NA
## 8 QmNR4EtKBGFrfdBJQhn7… NA NA NA
## 9 QmQuCY9bwTF13ZkPdpgy… NA NA NA
## 10 QmZjMSBvMuW3H1kjmguj… NA NA NA
## # ℹ 340 more rows
## # ℹ 3 more variables: rewards_proportion <dbl>, sum_queries_48h <dbl>,
## # queries_per_indexer_48h <dbl>
save.image('/root/github/indexer_analytics_tutorial/data/chapters_snapshots/07-query_fee_proportions.RData')