15 Query Fee Proportions

Summarize the query volume data

# Calculate query volume over the last 48 hours
# Ensure lubridate is loaded for days() or use base R equivalent
# library(lubridate)
latest_date <- max(query_volume$date)
# Use base R approach for date subtraction to avoid dependency
start_date_48h <- as.Date(latest_date) - 2

query_volume_48h <- query_volume %>%
  filter(date >= start_date_48h) %>%
  # rename subgraph_id to deployment (to match our other data sources)
  rename(deployment = subgraph_deployment_ipfs_hash) %>%
  # group by subgraph deployment
  group_by(deployment) %>%
  # sum the number of queries by deployment over the 48h period
  # Use na.rm=TRUE just in case query_count has NAs
  summarize(sum_queries_48h = sum(query_count, na.rm = TRUE)) %>%
  # sort the data by deployments with largest number of queries first
  arrange(desc(sum_queries_48h))

# Optional: Keep historical calculation if needed elsewhere
# filtered_query_volume = query_volume %>% ... (original calculation) ...
## # A tibble: 2,677 × 2
##    deployment                                     sum_queries_48h
##    <chr>                                                    <int>
##  1 QmbeQwnRSX4Fs9Bo3LZCAvkb4psdmcS8AqKv78Tcq6ohED        28471408
##  2 QmdKXcBUHR3UyURqVRQHu1oV6VUkBrhi2vNvMx3bNDnUCc        18191582
##  3 QmaJSe4JD7wJvA5syMPxvvp8TNGY7GtBa9kUxALR8RYmhp         7113612
##  4 QmcMA5jZGTPuNrg7Zx6bE8jXXVTR31sLk5EdvEm6zxj29e         6830216
##  5 QmSaKiqQa5y2qGmSdYTYZh2xd4q1T5fBodwdGrWuBXg9vC         6309293
##  6 QmZsgJLiLQKpb8hxTmQ5LWyrFVvfWzVaL4WK8dfFBn7EeK         5483533
##  7 QmYrEJKHphWBGkqPkEVKSZR9gsoD6RtJs3g3R8iWVhH66Z         5078083
##  8 QmTZ8ejXJxRo7vDBS4uwqBeGoxLSWbhaA7oXa1RvxunLy7         5053823
##  9 QmVbhUD9ev2cHhqdxkm4L8wFbpQsTeCBaVWBYpP1m8CdwC         4921381
## 10 QmQEYSGSD8t7jTw4gS2dwC4DLvyZceR9fYQ432Ff1hZpCp         4374908
## # ℹ 2,667 more rows

Join results back to the list of synced subgraphs:

# Join the 48h query volume results back to the synced list
# Ensure previous joins haven't created duplicate deployment columns
synced_subgraphs %<>%
  select(-any_of(c("sum_queries", "queries_per_indexer"))) %>% # Remove old cols if they exist
  left_join(query_volume_48h, by='deployment')

# Optional: Join historical volume if needed elsewhere
# synced_subgraphs %<>% left_join(filtered_query_volume, by='deployment', suffix = c("", "_hist"))
## # A tibble: 258 × 6
##    deployment            unique_indexers signalled_tokens total_allocated_tokens
##    <chr>                           <int>            <dbl>                  <dbl>
##  1 QmTyyrcaTduZN1ySRwtV…               5            9902.               2369230 
##  2 QmWXhLkz6fRJwLyFmgBK…              13            6931.               1938503.
##  3 QmVUcp8WqE47payYxMJU…               9            4554.               1048922 
##  4 QmdKXcBUHR3UyURqVRQH…              25           33256.               8813353.
##  5 QmW26TG5s9myd1gzio9f…              15            4843.               1316928.
##  6 QmYSxAAWeXDJ8DbWTy3o…              11            2976.                707341 
##  7 QmYV4MbMHuE98rWL5HRJ…               6             495.                117677 
##  8 QmddvgpoNsfxXyQ972Kh…               8            1981.                470963 
##  9 QmSK5hvEsM5mw6658SE1…              10            4952.               1167837 
## 10 QmQuCY9bwTF13ZkPdpgy…              NA              NA                     NA 
## # ℹ 248 more rows
## # ℹ 2 more variables: rewards_proportion <dbl>, sum_queries_48h <int>

Calculate the number of queries available per indexer (over 48h):

synced_subgraphs %<>%
  # Calculate queries per indexer based on 48h data
  # Ensure unique_indexers is not 0 or NA to avoid division errors
  # Replace NA sum_queries_48h with 0
  mutate(sum_queries_48h = ifelse(is.na(sum_queries_48h), 0, sum_queries_48h)) %>%
  mutate(queries_per_indexer_48h = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_48h / unique_indexers))

# Optional: Calculate historical queries per indexer if needed
# mutate(queries_per_indexer_hist = ifelse(is.na(unique_indexers) | unique_indexers == 0, 0, sum_queries_hist / unique_indexers))
## # A tibble: 258 × 7
##    deployment            unique_indexers signalled_tokens total_allocated_tokens
##    <chr>                           <int>            <dbl>                  <dbl>
##  1 QmTyyrcaTduZN1ySRwtV…               5            9902.               2369230 
##  2 QmWXhLkz6fRJwLyFmgBK…              13            6931.               1938503.
##  3 QmVUcp8WqE47payYxMJU…               9            4554.               1048922 
##  4 QmdKXcBUHR3UyURqVRQH…              25           33256.               8813353.
##  5 QmW26TG5s9myd1gzio9f…              15            4843.               1316928.
##  6 QmYSxAAWeXDJ8DbWTy3o…              11            2976.                707341 
##  7 QmYV4MbMHuE98rWL5HRJ…               6             495.                117677 
##  8 QmddvgpoNsfxXyQ972Kh…               8            1981.                470963 
##  9 QmSK5hvEsM5mw6658SE1…              10            4952.               1167837 
## 10 QmQuCY9bwTF13ZkPdpgy…              NA              NA                     NA 
## # ℹ 248 more rows
## # ℹ 3 more variables: rewards_proportion <dbl>, sum_queries_48h <dbl>,
## #   queries_per_indexer_48h <dbl>
save.image('/root/github/indexer_analytics_tutorial/data/chapters_snapshots/07-query_fee_proportions.RData')