15 Query Fee Proportions
Summarize the query volume data
# Calculate query volume over the last 48 hours
# Ensure lubridate is loaded for days() or use base R equivalent
# library(lubridate)
max(query_volume$date)
latest_date <-# Use base R approach for date subtraction to avoid dependency
as.Date(latest_date) - 2
start_date_48h <-
query_volume %>%
query_volume_48h <- filter(date >= start_date_48h) %>%
# rename subgraph_id to deployment (to match our other data sources)
rename(deployment = subgraph_deployment_ipfs_hash) %>%
# group by subgraph deployment
group_by(deployment) %>%
# sum the number of queries by deployment over the 48h period
# Use na.rm=TRUE just in case query_count has NAs
summarize(sum_queries_48h = sum(query_count, na.rm = TRUE)) %>%
# sort the data by deployments with largest number of queries first
arrange(desc(sum_queries_48h))
# Optional: Keep historical calculation if needed elsewhere
# filtered_query_volume = query_volume %>% ... (original calculation) ...
## # A tibble: 2,677 × 2
## deployment sum_queries_48h
## <chr> <int>
## 1 QmbeQwnRSX4Fs9Bo3LZCAvkb4psdmcS8AqKv78Tcq6ohED 28471408
## 2 QmdKXcBUHR3UyURqVRQHu1oV6VUkBrhi2vNvMx3bNDnUCc 18191582
## 3 QmaJSe4JD7wJvA5syMPxvvp8TNGY7GtBa9kUxALR8RYmhp 7113612
## 4 QmcMA5jZGTPuNrg7Zx6bE8jXXVTR31sLk5EdvEm6zxj29e 6830216
## 5 QmSaKiqQa5y2qGmSdYTYZh2xd4q1T5fBodwdGrWuBXg9vC 6309293
## 6 QmZsgJLiLQKpb8hxTmQ5LWyrFVvfWzVaL4WK8dfFBn7EeK 5483533
## 7 QmYrEJKHphWBGkqPkEVKSZR9gsoD6RtJs3g3R8iWVhH66Z 5078083
## 8 QmTZ8ejXJxRo7vDBS4uwqBeGoxLSWbhaA7oXa1RvxunLy7 5053823
## 9 QmVbhUD9ev2cHhqdxkm4L8wFbpQsTeCBaVWBYpP1m8CdwC 4921381
## 10 QmQEYSGSD8t7jTw4gS2dwC4DLvyZceR9fYQ432Ff1hZpCp 4374908
## # ℹ 2,667 more rows
Join results back to the list of synced subgraphs:
# Join the 48h query volume results back to the synced list
# Ensure previous joins haven't created duplicate deployment columns
%<>%
synced_subgraphs select(-any_of(c("sum_queries", "queries_per_indexer"))) %>% # Remove old cols if they exist
left_join(query_volume_48h, by='deployment')
# Optional: Join historical volume if needed elsewhere
# synced_subgraphs %<>% left_join(filtered_query_volume, by='deployment', suffix = c("", "_hist"))
## # A tibble: 258 × 6
## deployment unique_indexers signalled_tokens total_allocated_tokens
## <chr> <int> <dbl> <dbl>
## 1 QmTyyrcaTduZN1ySRwtV… 5 9902. 2369230
## 2 QmWXhLkz6fRJwLyFmgBK… 13 6931. 1938503.
## 3 QmVUcp8WqE47payYxMJU… 9 4554. 1048922
## 4 QmdKXcBUHR3UyURqVRQH… 25 33256. 8813353.
## 5 QmW26TG5s9myd1gzio9f… 15 4843. 1316928.
## 6 QmYSxAAWeXDJ8DbWTy3o… 11 2976. 707341
## 7 QmYV4MbMHuE98rWL5HRJ… 6 495. 117677
## 8 QmddvgpoNsfxXyQ972Kh… 8 1981. 470963
## 9 QmSK5hvEsM5mw6658SE1… 10 4952. 1167837
## 10 QmQuCY9bwTF13ZkPdpgy… NA NA NA
## # ℹ 248 more rows
## # ℹ 2 more variables: rewards_proportion <dbl>, sum_queries_48h <int>
Calculate the number of queries available per indexer (over 48h):
%<>%
synced_subgraphs # Calculate queries per indexer based on 48h data
# Ensure unique_indexers is not 0 or NA to avoid division errors
# Replace NA sum_queries_48h with 0
mutate(sum_queries_48h = ifelse(is.na(sum_queries_48h), 0, sum_queries_48h)) %>%
mutate(queries_per_indexer_48h = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_48h / unique_indexers))
# Optional: Calculate historical queries per indexer if needed
# mutate(queries_per_indexer_hist = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_hist / unique_indexers))
## # A tibble: 258 × 7
## deployment unique_indexers signalled_tokens total_allocated_tokens
## <chr> <int> <dbl> <dbl>
## 1 QmTyyrcaTduZN1ySRwtV… 5 9902. 2369230
## 2 QmWXhLkz6fRJwLyFmgBK… 13 6931. 1938503.
## 3 QmVUcp8WqE47payYxMJU… 9 4554. 1048922
## 4 QmdKXcBUHR3UyURqVRQH… 25 33256. 8813353.
## 5 QmW26TG5s9myd1gzio9f… 15 4843. 1316928.
## 6 QmYSxAAWeXDJ8DbWTy3o… 11 2976. 707341
## 7 QmYV4MbMHuE98rWL5HRJ… 6 495. 117677
## 8 QmddvgpoNsfxXyQ972Kh… 8 1981. 470963
## 9 QmSK5hvEsM5mw6658SE1… 10 4952. 1167837
## 10 QmQuCY9bwTF13ZkPdpgy… NA NA NA
## # ℹ 248 more rows
## # ℹ 3 more variables: rewards_proportion <dbl>, sum_queries_48h <dbl>,
## # queries_per_indexer_48h <dbl>
save.image('/root/github/indexer_analytics_tutorial/data/chapters_snapshots/07-query_fee_proportions.RData')