main.go - 2024-election-modelling - A walking stick to Nate Silver's sportscar

main.go (16111B)
      1 package main
      2 
      3 import (
      4 	"encoding/csv"
      5 	"fmt"
      6 	"git.nunosempere.com/NunoSempere/US-2024/core"
      7 	"math"
      8 	rand "math/rand/v2"
      9 	"os"
     10 	"strconv"
     11 	"time"
     12 	// "strings"
     13 )
     14 
     15 /* Structs */
     16 type State struct {
     17 	Name                         string
     18 	Votes                        int
     19 	PresidentialElectoralHistory map[string]string
     20 	Polls                        []Poll
     21 }
     22 
     23 type VotesForEachParty struct {
     24 	Democrats   int
     25 	Republicans int
     26 }
     27 
     28 type Poll struct {
     29 	PollId      string
     30 	SampleSize  int
     31 	PollResults map[string]float64
     32 	Date        time.Time
     33 	Partisan    string
     34 }
     35 
     36 /* Globals */
     37 var r = rand.New(rand.NewPCG(uint64(100), uint64(2224)))
     38 var dem_nominee_name = "Biden"
     39 var rep_nominee_name = "Trump"
     40 
     41 /* Sampling helper functions */
     42 func getNormalCDF(x float64, mean float64, std float64) float64 {
     43 	erf_term := (x - mean) / (std * math.Sqrt2)
     44 	return (1 + math.Erf(erf_term)) / 2
     45 }
     46 
     47 func getProbabilityAboveX(x float64, mean float64, std float64) float64 {
     48 	return 1 - getNormalCDF(x, mean, std)
     49 }
     50 
     51 func getChanceCandidateWinsFromPollShare(candidate_p float64, poll_sample_size float64) float64 {
     52 	std := math.Sqrt(candidate_p * (1 - candidate_p) / poll_sample_size) // https://stats.stackexchange.com/questions/258879/how-to-interpret-margin-of-error-in-a-non-binary-poll
     53 	return getProbabilityAboveX(0.5, candidate_p, std)
     54 }
     55 
     56 func getChanceRepublicanWinFromPoll(poll Poll, pretty_print bool) float64 {
     57 	biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
     58 	trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
     59 	if !biden_exists || !trump_exists {
     60 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
     61 	}
     62 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
     63 	trump_share := trump_percentage / 100.0
     64 
     65 	normalized_trump_share := trump_share / (trump_share + biden_share)
     66 	normalized_biden_share := biden_share / (trump_share + biden_share)
     67 
     68 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
     69 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
     70 
     71 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error_poll_mean)
     72 
     73 	if pretty_print {
     74 		fmt.Printf("\n\t\tSample size: %f", joint_trump_biden_sample_size)
     75 		fmt.Printf("\n\t\tMean R: %f", 100.0*normalized_trump_share)
     76 		fmt.Printf("\n\t\tStd of mean R: %f", 100*std_error_poll_mean)
     77 		fmt.Printf("\n\t\tPoll says chance of R win: %f", p_republican_win)
     78 	}
     79 	return p_republican_win
     80 }
     81 
     82 func getChanceRepublicanWinFromPollPlusUncertainty(poll Poll, state State, pretty_print bool) float64 {
     83 	// Uncertainty from the state
     84 	n_republican_win := 0
     85 	for _, party := range state.PresidentialElectoralHistory {
     86 		if party == "R" {
     87 			n_republican_win++
     88 		}
     89 	}
     90 
     91 	// Get the uncertainty from the poll
     92 	biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
     93 	trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
     94 	if !biden_exists || !trump_exists {
     95 		panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
     96 	}
     97 	biden_share := biden_percentage / 100.0 // will panic if the item is not found, but we've previously filtered for it
     98 	trump_share := trump_percentage / 100.0
     99 
    100 	normalized_trump_share := trump_share / (trump_share + biden_share)
    101 	normalized_biden_share := biden_share / (trump_share + biden_share)
    102 
    103 	joint_trump_biden_sample_size := (biden_share + trump_share) * float64(poll.SampleSize)
    104 	std_error_poll_mean := math.Sqrt((normalized_trump_share * normalized_biden_share) / joint_trump_biden_sample_size)
    105 
    106 	/* Inject additional uncertainty */
    107 	/*
    108 		Possible factors:
    109 			- National drift between now and the election (biggest one)
    110 			- States more uncertain than the national average
    111 			- Idiosyncratic factors
    112 			- Polls not being as good as gallup
    113 			- Increased polarization
    114 		Also note that the polls already have some error already
    115 	*/
    116 	std_additional_uncertainty := core.Std_additional_uncertainty
    117 
    118 	if n_republican_win == 0 || n_republican_win == 6 {
    119 		// if solid states for the last 6 elections
    120 		std_additional_uncertainty = std_additional_uncertainty / 3.0
    121 
    122 		if pretty_print {
    123 			fmt.Printf("\n\t\tN republican wins: %d", n_republican_win)
    124 			fmt.Printf("\n\t\t=> Reducing additional uncertainty")
    125 		}
    126 	}
    127 
    128 	std_error := std_error_poll_mean + std_additional_uncertainty
    129 
    130 	// std_error := std_error_poll_mean + 0.065
    131 	p_republican_win := getProbabilityAboveX(0.5, normalized_trump_share, std_error)
    132 
    133 	if pretty_print {
    134 		fmt.Printf("\n\t\tStd with std_additional_uncertainty R: %f", 100*std_error)
    135 		fmt.Printf("\n\t\tPoll plus uncertainty says chance of R win: %f", p_republican_win)
    136 	}
    137 	return p_republican_win
    138 }
    139 
    140 /* Print state by state data */
    141 func printStates(states []State) {
    142 	for _, state := range states {
    143 		fmt.Printf("\n\nState: %s", state.Name)
    144 		fmt.Printf("\n\tVotes: %d", state.Votes)
    145 		fmt.Printf("\n\tHistory: %s", state.PresidentialElectoralHistory)
    146 
    147 		p_baserate_republican_win := 0.0
    148 		for _, party := range state.PresidentialElectoralHistory {
    149 			if party == "R" {
    150 				p_baserate_republican_win++
    151 			}
    152 		}
    153 		fmt.Printf("\n\tHistorical base rate of R win: %f", p_baserate_republican_win/float64(len(state.PresidentialElectoralHistory)))
    154 
    155 		// Individual poll
    156 		for _, poll := range state.Polls {
    157 			fmt.Printf("\n\tPoll: %+v", poll)
    158 			_ = getChanceRepublicanWinFromPoll(poll, true)
    159 			_ = getChanceRepublicanWinFromPollPlusUncertainty(poll, state, true)
    160 		}
    161 
    162 		// Aggregate poll
    163 		num_biden_votes := 0.0
    164 		num_trump_votes := 0.0
    165 		for _, poll := range state.Polls {
    166 			biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
    167 			trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
    168 			if !biden_exists || !trump_exists {
    169 				panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
    170 			}
    171 			num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
    172 			num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
    173 		}
    174 		aggregate_sample_size := num_biden_votes + num_trump_votes
    175 		if aggregate_sample_size != 0.0 {
    176 			var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
    177 			aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size
    178 			aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size
    179 
    180 			fmt.Printf("\n\tAggregate poll: %+v", aggregate_poll)
    181 			_ = getChanceRepublicanWinFromPoll(aggregate_poll, true)
    182 			_ = getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, true)
    183 		}
    184 	}
    185 }
    186 
    187 /* Sample state by state */
    188 func sampleFromState(state State) VotesForEachParty {
    189 	switch state.Name {
    190 	case "Nebraska":
    191 		/*
    192 			2000: R
    193 			2004: R
    194 			2008: Split, 1 D, 4 R
    195 			2012: R
    196 			2016: R
    197 			2020: Split, 1 D, 4 R
    198 		*/
    199 		p_split := 2.0 / 6.0
    200 		if r.Float64() < p_split {
    201 			return VotesForEachParty{Democrats: 1, Republicans: 4}
    202 		} else {
    203 			return VotesForEachParty{Democrats: 0, Republicans: 5}
    204 		}
    205 	case "Maine":
    206 		/*
    207 			2000: D
    208 			2004: D
    209 			2008: D
    210 			2012: D
    211 			2016: Split: 3 D, 1 R
    212 			2020: Split, 3 D, 1 R
    213 		*/
    214 		p_split := 2.0 / 6.0
    215 		if r.Float64() < p_split {
    216 			return VotesForEachParty{Democrats: 3, Republicans: 1}
    217 		} else {
    218 			return VotesForEachParty{Democrats: 1, Republicans: 0}
    219 		}
    220 	default:
    221 		{
    222 			/* Consider the base rate for the state */
    223 			p_baserate_republican_win := 0.0
    224 			for _, party := range state.PresidentialElectoralHistory {
    225 				if party == "R" {
    226 					p_baserate_republican_win++
    227 				}
    228 			}
    229 			p_baserate_republican_win = p_baserate_republican_win / float64(len(state.PresidentialElectoralHistory))
    230 			p_republican_win := p_baserate_republican_win // if no polls
    231 
    232 			/* Consider polls */
    233 			num_biden_votes := 0.0
    234 			num_trump_votes := 0.0
    235 			for _, poll := range state.Polls {
    236 				biden_percentage, biden_exists := poll.PollResults[dem_nominee_name]
    237 				trump_percentage, trump_exists := poll.PollResults[rep_nominee_name]
    238 				if !biden_exists || !trump_exists {
    239 					panic("PollResults of poll filtered to have Biden/Trump doesn't have Biden/Trump")
    240 				}
    241 				num_biden_votes += (biden_percentage / 100.0) * float64(poll.SampleSize)
    242 				num_trump_votes += (trump_percentage / 100.0) * float64(poll.SampleSize)
    243 			}
    244 
    245 			aggregate_sample_size := num_biden_votes + num_trump_votes
    246 			if aggregate_sample_size != 0.0 {
    247 				var aggregate_poll = Poll{SampleSize: int(aggregate_sample_size), PollResults: make(map[string]float64)}
    248 				aggregate_poll.PollResults[dem_nominee_name] = 100.0 * num_biden_votes / aggregate_sample_size
    249 				aggregate_poll.PollResults[rep_nominee_name] = 100.0 * num_trump_votes / aggregate_sample_size
    250 
    251 				p_republican_win_aggregate_polls := getChanceRepublicanWinFromPollPlusUncertainty(aggregate_poll, state, false)
    252 				// p_republican_win_aggregate_polls = getChanceRepublicanWinFromPoll(aggregate_poll, false)
    253 
    254 				weight_polls := core.Weight_polls_vs_baserate // 0.75
    255 				p_republican_win = weight_polls*p_republican_win_aggregate_polls + (1.0-weight_polls)*p_baserate_republican_win
    256 				// p_republican_win = p_republican_win_aggregate_polls
    257 			}
    258 
    259 			if r.Float64() < p_republican_win {
    260 				return VotesForEachParty{Democrats: 0, Republicans: state.Votes}
    261 			} else {
    262 				return VotesForEachParty{Democrats: state.Votes, Republicans: 0}
    263 			}
    264 		}
    265 	}
    266 }
    267 
    268 /* Simulate election */
    269 func simulateElection(states []State) int {
    270 	republican_seats := 0
    271 	for _, state := range states {
    272 		election_sample := sampleFromState(state)
    273 		republican_seats += election_sample.Republicans
    274 	}
    275 	return republican_seats
    276 }
    277 
    278 /* Histogram */
    279 func barString(n int) string {
    280 	str := ""
    281 	for i := 0; i < n; i++ {
    282 		str += "█"
    283 	}
    284 	return str
    285 }
    286 
    287 func printElectoralCollegeHistogram(samples []int) {
    288 	histogram := [538]int{}
    289 	for _, sample := range samples {
    290 		histogram[sample]++
    291 	}
    292 
    293 	max_count := 0
    294 	for _, count := range histogram {
    295 		if count > max_count {
    296 			max_count = count
    297 		}
    298 	}
    299 
    300 	cp := 0.0
    301 	for i, count := range histogram {
    302 		bar_length := (count * 100) / max_count // Assuming max_count bar length is 50 characters. 75.
    303 		p := float64(count) / float64(len(samples)) * 100
    304 		cp += p
    305 
    306 		if i > 130 && i < 538 {
    307 			fmt.Printf("[ %2d, %4d): %s %.2f%% (%.0f%%)\n", i, i+1, barString(bar_length), p, cp)
    308 		} else if p >= 0.01 {
    309 			fmt.Printf(">0.01 probability outside of domain, you might want to change histogram parameters\n")
    310 
    311 		}
    312 	}
    313 }
    314 
    315 /* Load data from csvs */
    316 // Glue code
    317 func readStates() ([]State, error) {
    318 	var states map[string]State = make(map[string]State)
    319 
    320 	/* Electoral college votes for the 2024 election*/
    321 	votes_file, err := os.Open("data/num-electors/electoral-college-votes.csv")
    322 	// votes_file, err := os.Open("data/electoral-college-votes-2010-census.csv")
    323 	if err != nil {
    324 		return nil, fmt.Errorf("error opening the votes file: %v", err)
    325 	}
    326 	defer votes_file.Close()
    327 
    328 	votes_reader := csv.NewReader(votes_file)
    329 	if _, err := votes_reader.Read(); err != nil { // Skip header
    330 		return nil, fmt.Errorf("error reading votes header: %v", err)
    331 	}
    332 	for {
    333 		csv_record, err := votes_reader.Read()
    334 		if err != nil {
    335 			break // EOF or an error
    336 		}
    337 		votes, err := strconv.Atoi(csv_record[1])
    338 		if err != nil {
    339 			continue // Error in converting votes, skip this record
    340 		}
    341 		state := csv_record[0]
    342 		if _, exists := states[state]; !exists {
    343 			states[state] = State{Name: state, Votes: votes, PresidentialElectoralHistory: make(map[string]string)}
    344 		}
    345 	}
    346 
    347 	/* Election results */
    348 	var years = []string{"2000", "2004", "2008", "2012", "2016", "2020"}
    349 	for _, year := range years {
    350 		electoral_history_filename := fmt.Sprintf("data/electoral-history/%s.csv", year)
    351 		electoral_history_file, err := os.Open(electoral_history_filename)
    352 		if err != nil {
    353 			return nil, fmt.Errorf("error opening the electoral_history file for %s: %v", year, err)
    354 		}
    355 		electoral_history_reader := csv.NewReader(electoral_history_file)
    356 		if _, err := electoral_history_reader.Read(); err != nil { // Skip header
    357 			return nil, fmt.Errorf("error reading electoral_history header for %s: %v", year, err)
    358 		}
    359 		for {
    360 			record, err := electoral_history_reader.Read()
    361 			if err != nil {
    362 				break // EOF or an error
    363 			}
    364 			state, party := record[0], record[1]
    365 			data, exists := states[state]
    366 			if !exists {
    367 				continue // State not found in votes map, skip
    368 			}
    369 			// Update the party winning in the specific year
    370 			data.PresidentialElectoralHistory[year] = party
    371 			states[state] = data
    372 		}
    373 
    374 		electoral_history_file.Close()
    375 	}
    376 
    377 	/* Read polls */
    378 	polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path
    379 	if err != nil {
    380 		return nil, fmt.Errorf("error opening the polls file: %v", err)
    381 	}
    382 	defer polls_file.Close()
    383 
    384 	// Using a temporary map to group poll results by state and poll ID
    385 	state_polls_map := make(map[string]map[string]Poll)
    386 
    387 	polls_reader := csv.NewReader(polls_file)
    388 	_, err = polls_reader.Read() // Skip the header
    389 	if err != nil {
    390 		return nil, fmt.Errorf("error reading polls header: %v", err)
    391 	}
    392 
    393 	for {
    394 		record, err := polls_reader.Read()
    395 		if err != nil {
    396 			break // EOF or an error
    397 		}
    398 
    399 		poll_id := record[0]
    400 		state_name := record[12]
    401 		end_date := record[14]
    402 		partisan := record[32]
    403 		candidate_name := record[44]
    404 
    405 		date_layout := "1/2/06"
    406 		parsed_date, err := time.Parse(date_layout, end_date)
    407 		if err != nil {
    408 			fmt.Println("Error parsing date: ", err)
    409 		}
    410 
    411 		sample_size, err := strconv.Atoi(record[22])
    412 		if err != nil {
    413 			continue // If error, skip this record
    414 		}
    415 
    416 		percentage, err := strconv.ParseFloat(record[47], 64) // percentage is in the 42nd column
    417 		if err != nil {
    418 			fmt.Printf("Error parsing percentage")
    419 			continue // If error, skip this record
    420 		}
    421 
    422 		if _, exists := state_polls_map[state_name]; !exists {
    423 			state_polls_map[state_name] = make(map[string]Poll)
    424 		}
    425 
    426 		poll, exists := state_polls_map[state_name][poll_id]
    427 		if !exists {
    428 			poll = Poll{
    429 				PollId:      poll_id,
    430 				SampleSize:  sample_size,
    431 				PollResults: make(map[string]float64),
    432 				Date:        parsed_date,
    433 				Partisan:    partisan,
    434 			}
    435 		}
    436 		poll.PollResults[candidate_name] = percentage
    437 		state_polls_map[state_name][poll_id] = poll
    438 	}
    439 
    440 	// Add the aggregated poll data to the respective states
    441 	for state_name, polls := range state_polls_map {
    442 
    443 		// Filter polls by recency and by having both Biden and Trump
    444 		var recent_polls []Poll
    445 		for _, poll := range polls {
    446 			if poll.Date.After(time.Now().AddDate(0, 0, -core.Include_polls_within_n_days)) {
    447 				recent_polls = append(recent_polls, poll)
    448 			}
    449 		}
    450 		var recent_biden_trump_polls []Poll
    451 		for _, recent_poll := range recent_polls {
    452 			has_biden := false
    453 			has_trump := false
    454 			for candidate_name, _ := range recent_poll.PollResults {
    455 				if candidate_name == dem_nominee_name {
    456 					has_biden = true
    457 				} else if candidate_name == rep_nominee_name {
    458 					has_trump = true
    459 				}
    460 			}
    461 			if has_biden && has_trump {
    462 				recent_biden_trump_polls = append(recent_biden_trump_polls, recent_poll)
    463 			}
    464 		}
    465 
    466 		if state, exists := states[state_name]; exists {
    467 			state.Polls = recent_biden_trump_polls
    468 			states[state_name] = state // Not redundant
    469 		} else {
    470 			// fmt.Printf("Encountered new state: %s\n", state_name)
    471 			/*
    472 				states[state_name] = State{
    473 					Name:  state_name,
    474 					Polls: polls_slice,
    475 				}
    476 			*/
    477 		}
    478 	}
    479 
    480 	// Convert statesData map to a slice for returning
    481 	var states_slice []State
    482 	for _, state := range states {
    483 		states_slice = append(states_slice, state)
    484 	}
    485 	return states_slice, nil
    486 }
    487 
    488 func main() {
    489 	states, err := readStates()
    490 	if err != nil {
    491 		fmt.Println("Error:", err)
    492 		return
    493 	}
    494 
    495 	n_sims := 1_000_000
    496 
    497 	printStates(states)
    498 	fmt.Printf("\n\n")
    499 
    500 	p_republicans := 0.0
    501 	results := make([]int, n_sims)
    502 	for i := 0; i < n_sims; i++ {
    503 		republican_seats := simulateElection(states)
    504 		results[i] = republican_seats
    505 		if republican_seats >= 270 {
    506 			p_republicans++
    507 		}
    508 	}
    509 	printElectoralCollegeHistogram(results)
    510 
    511 	p_republicans = p_republicans / float64(n_sims)
    512 	fmt.Printf("\n%% republicans: %f\n", p_republicans)
    513 }
	2024-election-modelling A walking stick to Nate Silver's sportscar
	Log \| Files \| Refs \| README