9 Calculate New Allocations

print(paste('available tokens:', available_tokens$available_stake ))

## [1] "available tokens: 404937.361774109"

print(paste('total stake:', available_tokens$total_stake ))

## [1] "total stake: 2641737.36177411"

add 1,000 to all with at least 0.00001 in queries_proportion_per_indexer from rewards_info
add 100 at a time to the best one
re-calculate proportions and keep adding 100 at a time
when a single one reaches 10% of total, stop adding to that one
add to others using the same approach until out of available tokens

First, let’s set all new allocations to 0 as a start:

synced_subgraphs %<>% mutate(new_allocation = 0)

Next, let’s establish a maximum allocation size as 10% of our total stake. This helps spread our allocations across a larger number of subgraphs and avoid our APR from fluctuating as much after we have set the allocations:

max_allocation = available_tokens$total_stake * 0.1

## [1] 264173.7

Before the next step, let’s make sure to fill in NAs with 0’s to avoid issues when calculating the new allocations:

synced_subgraphs <- synced_subgraphs %>%
  replace_na(list(
    # Keep original NA replacements
    queries_per_indexer = 0,
    unique_indexers = 0,
    signalled_tokens = 0,
    total_allocated_tokens = 0,
    rewards_proportion = 0,
    sum_queries = 0,
    # Add new 48h metrics
    sum_queries_48h = 0,
    queries_per_indexer_48h = 0
  ))

Now let’s apply the tiered baseline allocations based on 48h query volume.

# --- START: New Tiered Baseline Allocation ---
synced_subgraphs <- synced_subgraphs %>%
  # Apply baseline allocations based on 48h query volume tiers
  # Order matters: check highest tier first
  mutate(new_allocation = case_when(
           queries_per_indexer_48h >= 1000000 ~ 13000, # 1M+ queries (48h) -> 13k GRT
           queries_per_indexer_48h >= 100000  ~ 7000,  # 100k+ queries (48h) -> 7k GRT
           queries_per_indexer_48h >= 10000   ~ 3000,  # 10k+ queries (48h) -> 3k GRT
           TRUE ~ 0 # Default to 0 if no tier is met
         ))
# --- END: New Tiered Baseline Allocation ---

# Display subgraphs receiving baseline allocations
print("Subgraphs receiving baseline allocations:")

## [1] "Subgraphs receiving baseline allocations:"

print(synced_subgraphs %>% filter(new_allocation > 0) %>% select(deployment, queries_per_indexer_48h, new_allocation))

## # A tibble: 0 × 3
## # ℹ 3 variables: deployment <chr>, queries_per_indexer_48h <dbl>,
## #   new_allocation <dbl>

# Calculate remaining tokens after baseline allocation
total_baseline_allocation <- sum(synced_subgraphs$new_allocation, na.rm = TRUE)

print(paste("Total baseline allocation:", total_baseline_allocation))

## [1] "Total baseline allocation: 0"

print(paste("Available stake:", available_tokens$available_stake))

## [1] "Available stake: 404937.361774109"

if (total_baseline_allocation > available_tokens$available_stake) {
  # Optional: Add logic here to scale down baseline allocations proportionally if needed
  # For now, we'll just cap remaining_tokens at 0 and skip iterative allocation
  warning("Total baseline allocation exceeds available stake. Skipping iterative allocation.")
  remaining_tokens <- 0
} else {
  remaining_tokens <- available_tokens$available_stake - total_baseline_allocation
}
print(paste("Remaining tokens for iterative allocation:", remaining_tokens))

## [1] "Remaining tokens for iterative allocation: 404937.361774109"

# # Sorting here is not strictly necessary before the loop, as the loop recalculates best row each time
# synced_subgraphs %<>% arrange(desc(rewards_proportion))


# calculate new allocations - actual calculation -----------------------------------------------

# Use the synced_subgraphs dataframe directly, ensuring it has the baseline allocations
data <- synced_subgraphs

# Select necessary columns, including the new 48h metric for reference
# Remove the original queries_per_indexer as it no longer exists in synced_subgraphs
data <- data %>%
  select(deployment, signalled_tokens, total_allocated_tokens,
         rewards_proportion, queries_per_indexer_48h, # Use the 48h metric
         new_allocation) # new_allocation now contains baselines

# # Calculate initial remaining tokens (already done above)
# remaining_tokens = available_tokens$available_stake - sum(data$new_allocation, na.rm=TRUE)

# Ensure remaining_tokens is not negative (already handled above)
# remaining_tokens = max(remaining_tokens, 0)

# Function to calculate potential rewards_proportion
calculate_rewards_proportion <- function(row, additional_allocation) {
  # Ensure denominators are not zero or NA
  denom <- row$total_allocated_tokens + row$new_allocation + additional_allocation
  if (is.na(denom) || denom == 0) {
    return(0) # Or handle as appropriate, e.g., return NA or Inf
  }
  # Ensure signalled_tokens is numeric
  signal <- as.numeric(row$signalled_tokens)
  if (is.na(signal)) {
    return(0) # Or handle NA signal
  }
  return(signal / denom)
}

# Check if data is empty
if (nrow(data) == 0) {
  print("Error: data is empty. Skipping allocation loop.")
} else if (remaining_tokens < 100) {
  print("No remaining tokens for iterative allocation.")
} else {
  print("Starting iterative allocation loop...")
  while (remaining_tokens >= 100) {
    # Print remaining tokens at the start of each iteration
    # print(paste("Remaining tokens:", remaining_tokens))
    
    # Calculate potential rewards_proportion for each row if we add 100 tokens
    # Use sapply for potentially better performance than mapply with split
    data$potential_rewards <- sapply(1:nrow(data), function(i) calculate_rewards_proportion(data[i, ], 100))
    
    # Find rows eligible for *additional* allocation (haven't reached max_allocation)
    # Ensure the current allocation is strictly less than max_allocation before considering adding 100
    eligible_rows <- which(data$new_allocation < max_allocation & (data$new_allocation + 100) <= max_allocation)
    
    # If no eligible rows, break the loop
    if (length(eligible_rows) == 0) {
      print("No eligible rows for further allocation. Breaking loop.")
      break
    }
    
    # Find the best eligible row based *only* on highest potential rewards_proportion
    best_row_index <- eligible_rows[which.max(data$potential_rewards[eligible_rows])]
    
    # Handle potential ties in which.max (e.g., pick the first one)
    if (length(best_row_index) > 1) {
        best_row_index <- best_row_index[1]
    }
    
    # Allocate 100 tokens to the best row
    data$new_allocation[best_row_index] <- data$new_allocation[best_row_index] + 100
    # DO NOT update total_allocated_tokens here - it represents global stake
    # data$total_allocated_tokens[best_row_index] <- data$total_allocated_tokens[best_row_index] + 100 # <-- REMOVED INCORRECT UPDATE
    remaining_tokens <- remaining_tokens - 100
    
    # Recalculate rewards_proportion for the updated row using the *original* total_allocated_tokens
    # This ensures the rewards_proportion reflects the correct ratio after our allocation addition
    data$rewards_proportion[best_row_index] <- calculate_rewards_proportion(data[best_row_index, ], 0) # Recalc with 0 additional
    
    # Print information about the allocation
    # print(paste("Allocated 100 tokens to row index", best_row_index, ". Deployment:", data$deployment[best_row_index], ". New allocation:", data$new_allocation[best_row_index]))
  }
  print("Finished iterative allocation loop.")
}

## [1] "Starting iterative allocation loop..."
## [1] "Finished iterative allocation loop."

# Print final allocation summary
print(paste("Total rows considered:", nrow(data)))

## [1] "Total rows considered: 294"

print(paste("Rows with non-zero allocation:", sum(data$new_allocation > 0, na.rm = TRUE)))

## [1] "Rows with non-zero allocation: 149"

print(paste("Total allocated (baseline + iterative):", sum(data$new_allocation, na.rm = TRUE)))

## [1] "Total allocated (baseline + iterative): 404900"

print(paste("Final remaining tokens:", remaining_tokens))

## [1] "Final remaining tokens: 37.361774109304"

# Ensure that synced_subgraphs is updated with the final new allocations from 'data'
# Use a robust join, handling potential duplicate columns if previous joins weren't clean
synced_subgraphs <- synced_subgraphs %>%
  select(-any_of("new_allocation")) %>% # Remove old new_allocation before join
  left_join(data %>% select(deployment, new_allocation), by = "deployment") %>%
  # Ensure NAs in new_allocation (if any subgraphs weren't in 'data') are 0
  mutate(new_allocation = ifelse(is.na(new_allocation), 0, new_allocation))

# Final sorted dataset
final_allocations_data = data %>% arrange(desc(new_allocation)) %>% filter(new_allocation > 0)
# show final data
print(final_allocations_data)

## # A tibble: 149 × 7
##    deployment         signalled_tokens total_allocated_tokens rewards_proportion
##    <chr>                         <dbl>                  <dbl>              <dbl>
##  1 QmVHVUTkiTEdF7Sij…             58.4                   1000            0.00508
##  2 QmUSffoxhjXNQjZmf…            991.                  186474            0.00507
##  3 QmUuXLf2WA7AaThyr…           1013.                  191156            0.00507
##  4 Qmd7Tub28sBv3UF4F…            995.                  187549            0.00507
##  5 QmQUUhJCBR1RLnqXS…            996.                  187944            0.00507
##  6 QmXkEwZRMagTAtZt1…           1000.                  188627            0.00507
##  7 QmVi7VY4RVYr3Ukf2…            996.                  187949            0.00507
##  8 QmSThLAKsPhzGffqK…            996.                  187953            0.00507
##  9 QmVxMhEzFyPHoGzK7…            988.                  186494            0.00507
## 10 QmbmFMeTQQu41NeAY…            991.                  186977            0.00507
## # ℹ 139 more rows
## # ℹ 3 more variables: queries_per_indexer_48h <dbl>, new_allocation <dbl>,
## #   potential_rewards <dbl>

Table version so I can better see results:

datatable(final_allocations_data,
            escape = FALSE,
            extensions = "Buttons",
            options = list(
              scrollX = TRUE,
              scrollY = "500px",
              paging = TRUE,
              searching = TRUE,
              ordering = TRUE,
              dom = 'Bfrtip',
              buttons = c('copy', 'csv', 'excel', 'pdf'),
              pageLength = 50
            ))

save.image('/root/github/indexer_analytics_tutorial/data/chapters_snapshots/10-calculate_allocations.RData')