18 Calculate New Allocations

print(paste('available tokens', available_tokens$available_stake ))
## [1] "available tokens 40.2845307029979"
print(paste('total stake', available_tokens$total_stake ))
## [1] "total stake 2716940.2845307"
# TEMP DELETE - set available_tokens to 2M - used to test new logic and simulate available GRT without affecting live indexer operation
available_tokens$available_stake = 2000000
available_tokens$total_stake = 3000000
print(paste('available tokens', available_tokens$available_stake ))
## [1] "available tokens 2000000"
print(paste('total stake', available_tokens$total_stake ))
## [1] "total stake 3000000"
  • add 1,000 to all with at least 0.00001 in queries_proportion_per_indexer from rewards_info
  • add 100 at a time to the best one
  • re-calculate proportions and keep adding 100 at a time
  • when a single one reaches 10% of total, stop adding to that one
  • add to others using the same approach until out of available tokens

First, let’s set all new allocations to 0 as a start:

synced_subgraphs %<>% mutate(new_allocation = 0)

Next, let’s establish a maximum allocation size as 10% of our total stake. This helps spread our allocations across a larger number of subgraphs and avoid our APR from fluctuating as much after we have set the allocations:

max_allocation = available_tokens$total_stake * 0.1
## [1] 300000

Before the next step, let’s make sure to fill in NAs with 0’s to avoid issues when calculating the new allocations:

synced_subgraphs <- synced_subgraphs %>%
  replace_na(list(
    # Keep original NA replacements
    queries_per_indexer = 0,
    unique_indexers = 0,
    signalled_tokens = 0,
    total_allocated_tokens = 0,
    rewards_proportion = 0,
    sum_queries = 0,
    # Add new 48h metrics
    sum_queries_48h = 0,
    queries_per_indexer_48h = 0
  ))

Now let’s apply the tiered baseline allocations based on 48h query volume.

# --- START: New Tiered Baseline Allocation ---
synced_subgraphs <- synced_subgraphs %>%
  # Apply baseline allocations based on 48h query volume tiers
  # Order matters: check highest tier first
  mutate(new_allocation = case_when(
           queries_per_indexer_48h >= 1000000 ~ 13000, # 1M+ queries (48h) -> 13k GRT
           queries_per_indexer_48h >= 100000  ~ 7000,  # 100k+ queries (48h) -> 7k GRT
           queries_per_indexer_48h >= 10000   ~ 3000,  # 10k+ queries (48h) -> 3k GRT
           TRUE ~ 0 # Default to 0 if no tier is met
         ))
# --- END: New Tiered Baseline Allocation ---

# Display subgraphs receiving baseline allocations
print("Subgraphs receiving baseline allocations:")
## [1] "Subgraphs receiving baseline allocations:"
print(synced_subgraphs %>% filter(new_allocation > 0) %>% select(deployment, queries_per_indexer_48h, new_allocation))
## # A tibble: 12 × 3
##    deployment                              queries_per_indexer_…¹ new_allocation
##    <chr>                                                    <dbl>          <dbl>
##  1 QmWXhLkz6fRJwLyFmgBKVu2NyMD6MqtGhcrt8b…                 12953.           3000
##  2 QmdKXcBUHR3UyURqVRQHu1oV6VUkBrhi2vNvMx…                727663.           7000
##  3 QmYwkNTGCHFu2Q1Kmmd5TrcqvruLMjKjzsGVJa…                 12174.           3000
##  4 QmXhqwzJZDgnAAVsEYYhg7H46wZQqCMnvXhKio…                 10898.           3000
##  5 QmXiRbvCs9aCt7PzkFE26SE4usDLJLeKhDWNpZ…                 77728.           3000
##  6 QmPbAPGuq8PT5qMW2dtKCZoxW3Rpa2kFfr3qdd…                 39286.           3000
##  7 QmQtNd36amtQ8h8GF5rwkLLWyyBGwqad3j3WgZ…                 14240.           3000
##  8 QmYWvmm6rxvAk8E3cA6iXPhC6ETBLJFuEw8maY…                 30691.           3000
##  9 Qmb27RY3RqP98UMKbTgScf6F7hhokfMuS9fV7V…                147025.           7000
## 10 QmeHZcGmj7wrJMHRrPVDGiVaprs7t21n1jjeUH…                 23850            3000
## 11 QmRbn71wTNK3PmEb62wUK4G1XmKN14ZbHeTgi5…                211215.           7000
## 12 QmUhiH6Z5xo6o3GNzsSvqpGKLmCt6w5WzKQ1yH…                 74799.           3000
## # ℹ abbreviated name: ¹​queries_per_indexer_48h
# Calculate remaining tokens after baseline allocation
total_baseline_allocation <- sum(synced_subgraphs$new_allocation, na.rm = TRUE)

print(paste("Total baseline allocation:", total_baseline_allocation))
## [1] "Total baseline allocation: 48000"
print(paste("Available stake:", available_tokens$available_stake))
## [1] "Available stake: 2000000"
if (total_baseline_allocation > available_tokens$available_stake) {
  # Optional: Add logic here to scale down baseline allocations proportionally if needed
  # For now, we'll just cap remaining_tokens at 0 and skip iterative allocation
  warning("Total baseline allocation exceeds available stake. Skipping iterative allocation.")
  remaining_tokens <- 0
} else {
  remaining_tokens <- available_tokens$available_stake - total_baseline_allocation
}
print(paste("Remaining tokens for iterative allocation:", remaining_tokens))
## [1] "Remaining tokens for iterative allocation: 1952000"
# # Sorting here is not strictly necessary before the loop, as the loop recalculates best row each time
# synced_subgraphs %<>% arrange(desc(rewards_proportion))


# calculate new allocations - actual calculation -----------------------------------------------

# Use the synced_subgraphs dataframe directly, ensuring it has the baseline allocations
data <- synced_subgraphs

# Select necessary columns, including the new 48h metric for reference
# Remove the original queries_per_indexer as it no longer exists in synced_subgraphs
data <- data %>%
  select(deployment, signalled_tokens, total_allocated_tokens,
         rewards_proportion, queries_per_indexer_48h, # Use the 48h metric
         new_allocation) # new_allocation now contains baselines

# # Calculate initial remaining tokens (already done above)
# remaining_tokens = available_tokens$available_stake - sum(data$new_allocation, na.rm=TRUE)

# Ensure remaining_tokens is not negative (already handled above)
# remaining_tokens = max(remaining_tokens, 0)

# Function to calculate potential rewards_proportion
calculate_rewards_proportion <- function(row, additional_allocation) {
  # Ensure denominators are not zero or NA
  denom <- row$total_allocated_tokens + row$new_allocation + additional_allocation
  if (is.na(denom) || denom == 0) {
    return(0) # Or handle as appropriate, e.g., return NA or Inf
  }
  # Ensure signalled_tokens is numeric
  signal <- as.numeric(row$signalled_tokens)
  if (is.na(signal)) {
    return(0) # Or handle NA signal
  }
  return(signal / denom)
}

# Check if data is empty
if (nrow(data) == 0) {
  print("Error: data is empty. Skipping allocation loop.")
} else if (remaining_tokens < 100) {
  print("No remaining tokens for iterative allocation.")
} else {
  print("Starting iterative allocation loop...")
  while (remaining_tokens >= 100) {
    # Print remaining tokens at the start of each iteration
    # print(paste("Remaining tokens:", remaining_tokens))
    
    # Calculate potential rewards_proportion for each row if we add 100 tokens
    # Use sapply for potentially better performance than mapply with split
    data$potential_rewards <- sapply(1:nrow(data), function(i) calculate_rewards_proportion(data[i, ], 100))
    
    # Find rows eligible for *additional* allocation (haven't reached max_allocation)
    # Ensure the current allocation is strictly less than max_allocation before considering adding 100
    eligible_rows <- which(data$new_allocation < max_allocation & (data$new_allocation + 100) <= max_allocation)
    
    # If no eligible rows, break the loop
    if (length(eligible_rows) == 0) {
      print("No eligible rows for further allocation. Breaking loop.")
      break
    }
    
    # Find the best eligible row based *only* on highest potential rewards_proportion
    best_row_index <- eligible_rows[which.max(data$potential_rewards[eligible_rows])]
    
    # Handle potential ties in which.max (e.g., pick the first one)
    if (length(best_row_index) > 1) {
        best_row_index <- best_row_index[1]
    }
    
    # Allocate 100 tokens to the best row
    data$new_allocation[best_row_index] <- data$new_allocation[best_row_index] + 100
    # DO NOT update total_allocated_tokens here - it represents global stake
    # data$total_allocated_tokens[best_row_index] <- data$total_allocated_tokens[best_row_index] + 100 # <-- REMOVED INCORRECT UPDATE
    remaining_tokens <- remaining_tokens - 100
    
    # Recalculate rewards_proportion for the updated row using the *original* total_allocated_tokens
    # This ensures the rewards_proportion reflects the correct ratio after our allocation addition
    data$rewards_proportion[best_row_index] <- calculate_rewards_proportion(data[best_row_index, ], 0) # Recalc with 0 additional
    
    # Print information about the allocation
    # print(paste("Allocated 100 tokens to row index", best_row_index, ". Deployment:", data$deployment[best_row_index], ". New allocation:", data$new_allocation[best_row_index]))
  }
  print("Finished iterative allocation loop.")
}
## [1] "Starting iterative allocation loop..."
## [1] "Finished iterative allocation loop."
# Print final allocation summary
print(paste("Total rows considered:", nrow(data)))
## [1] "Total rows considered: 258"
print(paste("Rows with non-zero allocation:", sum(data$new_allocation > 0, na.rm = TRUE)))
## [1] "Rows with non-zero allocation: 92"
print(paste("Total allocated (baseline + iterative):", sum(data$new_allocation, na.rm = TRUE)))
## [1] "Total allocated (baseline + iterative): 2000000"
print(paste("Final remaining tokens:", remaining_tokens))
## [1] "Final remaining tokens: 0"
# Ensure that synced_subgraphs is updated with the final new allocations from 'data'
# Use a robust join, handling potential duplicate columns if previous joins weren't clean
synced_subgraphs <- synced_subgraphs %>%
  select(-any_of("new_allocation")) %>% # Remove old new_allocation before join
  left_join(data %>% select(deployment, new_allocation), by = "deployment") %>%
  # Ensure NAs in new_allocation (if any subgraphs weren't in 'data') are 0
  mutate(new_allocation = ifelse(is.na(new_allocation), 0, new_allocation))
# Final sorted dataset
final_allocations_data = data %>% arrange(desc(new_allocation)) %>% filter(new_allocation > 0)
# show final data
print(final_allocations_data)
## # A tibble: 92 × 7
##    deployment         signalled_tokens total_allocated_tokens rewards_proportion
##    <chr>                         <dbl>                  <dbl>              <dbl>
##  1 QmTExu7dbmcZBaG4b…            2970.                535917             0.00422
##  2 QmRT7M2wHbkFdC9N9…             704.                  7023             0.00422
##  3 QmarbBQvPpbfG1juC…            4865.               1061400             0.00422
##  4 QmP1uNJnsz2v4ynvo…            9877.               2251854.            0.00422
##  5 QmVtjbRWU3jAPK48T…            2978.                634418             0.00422
##  6 QmcykahJW7hPPpvZK…            2971.                633462.            0.00422
##  7 QmYzXQTUFhqwnDFxS…            1079.                193371             0.00422
##  8 QmQtNd36amtQ8h8GF…            5120.               1162311             0.00422
##  9 QmcPi2qpDKaQuZdmk…            3005.                663567             0.00422
## 10 QmV5QTCUK8Zs6k6pj…            5011.               1141111             0.00422
## # ℹ 82 more rows
## # ℹ 3 more variables: queries_per_indexer_48h <dbl>, new_allocation <dbl>,
## #   potential_rewards <dbl>

Table version so I can better see results:

datatable(final_allocations_data,
            escape = FALSE,
            extensions = "Buttons",
            options = list(
              scrollX = TRUE,
              scrollY = "500px",
              paging = TRUE,
              searching = TRUE,
              ordering = TRUE,
              dom = 'Bfrtip',
              buttons = c('copy', 'csv', 'excel', 'pdf'),
              pageLength = 50
            ))
save.image('/root/github/indexer_analytics_tutorial/data/chapters_snapshots/10-calculate_allocations.RData')