main.go (16111B)
1 package main 2 3 import ( 4 "encoding/csv" 5 "fmt" 6 "git.nunosempere.com/NunoSempere/US-2024/core" 7 "math" 8 rand "math/rand/v2" 9 "os" 10 "strconv" 11 "time" 12 // "strings" 13 ) 14 15 /* Structs */ 16 type State struct { 17 Name string 18 Votes int 19 PresidentialElectoralHistory map[string]string 20 Polls []Poll 21 } 22 23 type VotesForEachParty struct { 24 Democrats int 25 Republicans int 26 } 27 28 type Poll struct { 29 PollId string 30 SampleSize int 31 PollResults map[string]float64 32 Date time.Time 33 Partisan string 34 } 35 36 /* Globals */ 37 var r = rand.New(rand.NewPCG(uint64(100), uint64(2224))) 38 var dem_nominee_name = "Biden" 39 var rep_nominee_name = "Trump" 40 41 /* Sampling helper functions */ 42 func getNormalCDF(x float64, mean float64, std float64) float64 { 43 erf_term := (x - mean) / (std * math.Sqrt2) 44 return (1 + math.Erf(erf_term)) / 2 45 } 46 47 func getProbabilityAboveX(x float64, mean float64, std float64) float64 { 48 return 1 - getNormalCDF(x, mean, std) 49 } 50 51 func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 { 52 std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll 53 return getProbabilityAboveX(0.5, candidate_p, std) 54 } 55 56 func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 { 57 biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] 58 trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] 59 if !biden_exists || !trump_exists { 60 panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") 61 } 62 biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it 63 trump_share := trump_percentage / 100.0 64 65 normalized_trump_share := trump_share / (trump_share + biden_share) 66 normalized_biden_share := biden_share / (trump_share + biden_share) 67 68 joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) 69 std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) 70 71 p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean) 72 73 if pretty_print { 74 fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size) 75 fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share) 76 fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean) 77 fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win) 78 } 79 return p_republican_win 80 } 81 82 func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 { 83 // Uncertainty from the state 84 n_republican_win := 0 85 for _, party := range state.PresidentialElectoralHistory { 86 if party == "R" { 87 n_republican_win++ 88 } 89 } 90 91 // Get the uncertainty from the poll 92 biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] 93 trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] 94 if !biden_exists || !trump_exists { 95 panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") 96 } 97 biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it 98 trump_share := trump_percentage / 100.0 99 100 normalized_trump_share := trump_share / (trump_share + biden_share) 101 normalized_biden_share := biden_share / (trump_share + biden_share) 102 103 joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize) 104 std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size) 105 106 /* Inject additional uncertainty */ 107 /* 108 Possible factors: 109 - National drift between now and the election (biggest one) 110 - States more uncertain than the national average 111 - Idiosyncratic factors 112 - Polls not being as good as gallup 113 - Increased polarization 114 Also note that the polls already have some error already 115 */ 116 std_additional_uncertainty := core.Std_additional_uncertainty 117 118 if n_republican_win == 0 || n_republican_win == 6 { 119 // if solid states for the last 6 elections 120 std_additional_uncertainty = std_additional_uncertainty / 3.0 121 122 if pretty_print { 123 fmt.Printf("\n\t\tN republican wins: %d", n_republican_win) 124 fmt.Printf("\n\t\t=> Reducing additional uncertainty") 125 } 126 } 127 128 std_error := std_error_poll_mean + std_additional_uncertainty 129 130 // std_error := std_error_poll_mean + 0.065 131 p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error) 132 133 if pretty_print { 134 fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error) 135 fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win) 136 } 137 return p_republican_win 138 } 139 140 /* Print state by state data */ 141 func printStates(states []State) { 142 for _, state := range states { 143 fmt.Printf("\n\nState: %s", state.Name) 144 fmt.Printf("\n\tVotes: %d", state.Votes) 145 fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory) 146 147 p_baserate_republican_win := 0.0 148 for _, party := range state.PresidentialElectoralHistory { 149 if party == "R" { 150 p_baserate_republican_win++ 151 } 152 } 153 fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory))) 154 155 // Individual poll 156 for _, poll := range state.Polls { 157 fmt.Printf("\n\tPoll: %+v", poll) 158 _ = getChanceRepublicanWinFromPoll(poll, true) 159 _ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true) 160 } 161 162 // Aggregate poll 163 num_biden_votes := 0.0 164 num_trump_votes := 0.0 165 for _, poll := range state.Polls { 166 biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] 167 trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] 168 if !biden_exists || !trump_exists { 169 panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") 170 } 171 num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) 172 num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) 173 } 174 aggregate_sample_size := num_biden_votes + num_trump_votes 175 if aggregate_sample_size != 0.0 { 176 var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} 177 aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size 178 aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size 179 180 fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll) 181 _ = getChanceRepublicanWinFromPoll(aggregate_poll, true) 182 _ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true) 183 } 184 } 185 } 186 187 /* Sample state by state */ 188 func sampleFromState(state State) VotesForEachParty { 189 switch state.Name { 190 case "Nebraska": 191 /* 192 2000: R 193 2004: R 194 2008: Split, 1 D, 4 R 195 2012: R 196 2016: R 197 2020: Split, 1 D, 4 R 198 */ 199 p_split := 2.0 / 6.0 200 if r.Float64() < p_split { 201 return VotesForEachParty{Democrats: 1, Republicans: 4} 202 } else { 203 return VotesForEachParty{Democrats: 0, Republicans: 5} 204 } 205 case "Maine": 206 /* 207 2000: D 208 2004: D 209 2008: D 210 2012: D 211 2016: Split: 3 D, 1 R 212 2020: Split, 3 D, 1 R 213 */ 214 p_split := 2.0 / 6.0 215 if r.Float64() < p_split { 216 return VotesForEachParty{Democrats: 3, Republicans: 1} 217 } else { 218 return VotesForEachParty{Democrats: 1, Republicans: 0} 219 } 220 default: 221 { 222 /* Consider the base rate for the state */ 223 p_baserate_republican_win := 0.0 224 for _, party := range state.PresidentialElectoralHistory { 225 if party == "R" { 226 p_baserate_republican_win++ 227 } 228 } 229 p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory)) 230 p_republican_win := p_baserate_republican_win // if no polls 231 232 /* Consider polls */ 233 num_biden_votes := 0.0 234 num_trump_votes := 0.0 235 for _, poll := range state.Polls { 236 biden_percentage, biden_exists := poll.PollResults[dem_nominee_name] 237 trump_percentage, trump_exists := poll.PollResults[rep_nominee_name] 238 if !biden_exists || !trump_exists { 239 panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump") 240 } 241 num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize) 242 num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize) 243 } 244 245 aggregate_sample_size := num_biden_votes + num_trump_votes 246 if aggregate_sample_size != 0.0 { 247 var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)} 248 aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size 249 aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size 250 251 p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false) 252 // p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false) 253 254 weight_polls := core.Weight_polls_vs_baserate // 0.75 255 p_republican_win = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win 256 // p_republican_win = p_republican_win_aggregate_polls 257 } 258 259 if r.Float64() < p_republican_win { 260 return VotesForEachParty{Democrats: 0, Republicans: state.Votes} 261 } else { 262 return VotesForEachParty{Democrats: state.Votes, Republicans: 0} 263 } 264 } 265 } 266 } 267 268 /* Simulate election */ 269 func simulateElection(states []State) int { 270 republican_seats := 0 271 for _, state := range states { 272 election_sample := sampleFromState(state) 273 republican_seats += election_sample.Republicans 274 } 275 return republican_seats 276 } 277 278 /* Histogram */ 279 func barString(n int) string { 280 str := "" 281 for i := 0; i < n; i++ { 282 str += "█" 283 } 284 return str 285 } 286 287 func printElectoralCollegeHistogram(samples []int) { 288 histogram := [538]int{} 289 for _, sample := range samples { 290 histogram[sample]++ 291 } 292 293 max_count := 0 294 for _, count := range histogram { 295 if count > max_count { 296 max_count = count 297 } 298 } 299 300 cp := 0.0 301 for i, count := range histogram { 302 bar_length := (count * 100) / max_count // Assuming max_count bar length is 50 characters. 75. 303 p := float64(count) / float64(len(samples)) * 100 304 cp += p 305 306 if i > 130 && i < 538 { 307 fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp) 308 } else if p >= 0.01 { 309 fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n") 310 311 } 312 } 313 } 314 315 /* Load data from csvs */ 316 // Glue code 317 func readStates() ([]State, error) { 318 var states map[string]State = make(map[string]State) 319 320 /* Electoral college votes for the 2024 election*/ 321 votes_file, err := os.Open("data/num-electors/electoral-college-votes.csv") 322 // votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv") 323 if err != nil { 324 return nil, fmt.Errorf("error opening the votes file: %v", err) 325 } 326 defer votes_file.Close() 327 328 votes_reader := csv.NewReader(votes_file) 329 if _, err := votes_reader.Read(); err != nil { // Skip header 330 return nil, fmt.Errorf("error reading votes header: %v", err) 331 } 332 for { 333 csv_record, err := votes_reader.Read() 334 if err != nil { 335 break // EOF or an error 336 } 337 votes, err := strconv.Atoi(csv_record[1]) 338 if err != nil { 339 continue // Error in converting votes, skip this record 340 } 341 state := csv_record[0] 342 if _, exists := states[state]; !exists { 343 states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)} 344 } 345 } 346 347 /* Election results */ 348 var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"} 349 for _, year := range years { 350 electoral_history_filename := fmt.Sprintf("data/electoral-history/%s.csv", year) 351 electoral_history_file, err := os.Open(electoral_history_filename) 352 if err != nil { 353 return nil, fmt.Errorf("error opening the electoral_history file for %s: %v", year, err) 354 } 355 electoral_history_reader := csv.NewReader(electoral_history_file) 356 if _, err := electoral_history_reader.Read(); err != nil { // Skip header 357 return nil, fmt.Errorf("error reading electoral_history header for %s: %v", year, err) 358 } 359 for { 360 record, err := electoral_history_reader.Read() 361 if err != nil { 362 break // EOF or an error 363 } 364 state, party := record[0], record[1] 365 data, exists := states[state] 366 if !exists { 367 continue // State not found in votes map, skip 368 } 369 // Update the party winning in the specific year 370 data.PresidentialElectoralHistory[year] = party 371 states[state] = data 372 } 373 374 electoral_history_file.Close() 375 } 376 377 /* Read polls */ 378 polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path 379 if err != nil { 380 return nil, fmt.Errorf("error opening the polls file: %v", err) 381 } 382 defer polls_file.Close() 383 384 // Using a temporary map to group poll results by state and poll ID 385 state_polls_map := make(map[string]map[string]Poll) 386 387 polls_reader := csv.NewReader(polls_file) 388 _, err = polls_reader.Read() // Skip the header 389 if err != nil { 390 return nil, fmt.Errorf("error reading polls header: %v", err) 391 } 392 393 for { 394 record, err := polls_reader.Read() 395 if err != nil { 396 break // EOF or an error 397 } 398 399 poll_id := record[0] 400 state_name := record[12] 401 end_date := record[14] 402 partisan := record[32] 403 candidate_name := record[44] 404 405 date_layout := "1/2/06" 406 parsed_date, err := time.Parse(date_layout, end_date) 407 if err != nil { 408 fmt.Println("Error parsing date: ", err) 409 } 410 411 sample_size, err := strconv.Atoi(record[22]) 412 if err != nil { 413 continue // If error, skip this record 414 } 415 416 percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column 417 if err != nil { 418 fmt.Printf("Error parsing percentage") 419 continue // If error, skip this record 420 } 421 422 if _, exists := state_polls_map[state_name]; !exists { 423 state_polls_map[state_name] = make(map[string]Poll) 424 } 425 426 poll, exists := state_polls_map[state_name][poll_id] 427 if !exists { 428 poll = Poll{ 429 PollId: poll_id, 430 SampleSize: sample_size, 431 PollResults: make(map[string]float64), 432 Date: parsed_date, 433 Partisan: partisan, 434 } 435 } 436 poll.PollResults[candidate_name] = percentage 437 state_polls_map[state_name][poll_id] = poll 438 } 439 440 // Add the aggregated poll data to the respective states 441 for state_name, polls := range state_polls_map { 442 443 // Filter polls by recency and by having both Biden and Trump 444 var recent_polls []Poll 445 for _, poll := range polls { 446 if poll.Date.After(time.Now().AddDate(0, 0, -core.Include_polls_within_n_days)) { 447 recent_polls = append(recent_polls, poll) 448 } 449 } 450 var recent_biden_trump_polls []Poll 451 for _, recent_poll := range recent_polls { 452 has_biden := false 453 has_trump := false 454 for candidate_name, _ := range recent_poll.PollResults { 455 if candidate_name == dem_nominee_name { 456 has_biden = true 457 } else if candidate_name == rep_nominee_name { 458 has_trump = true 459 } 460 } 461 if has_biden && has_trump { 462 recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll) 463 } 464 } 465 466 if state, exists := states[state_name]; exists { 467 state.Polls = recent_biden_trump_polls 468 states[state_name] = state // Not redundant 469 } else { 470 // fmt.Printf("Encountered new state: %s\n", state_name) 471 /* 472 states[state_name] = State{ 473 Name: state_name, 474 Polls: polls_slice, 475 } 476 */ 477 } 478 } 479 480 // Convert statesData map to a slice for returning 481 var states_slice []State 482 for _, state := range states { 483 states_slice = append(states_slice, state) 484 } 485 return states_slice, nil 486 } 487 488 func main() { 489 states, err := readStates() 490 if err != nil { 491 fmt.Println("Error:", err) 492 return 493 } 494 495 n_sims := 1_000_000 496 497 printStates(states) 498 fmt.Printf("\n\n") 499 500 p_republicans := 0.0 501 results := make([]int, n_sims) 502 for i := 0; i < n_sims; i++ { 503 republican_seats := simulateElection(states) 504 results[i] = republican_seats 505 if republican_seats >= 270 { 506 p_republicans++ 507 } 508 } 509 printElectoralCollegeHistogram(results) 510 511 p_republicans = p_republicans / float64(n_sims) 512 fmt.Printf("\n%% republicans: %f\n", p_republicans) 513 }