commit e25942b2c30eb1b4dee31049a7f9c42dc09b074a
parent e6374cbc82cd30f9ca812e00e542298fd2e94e30
Author: NunoSempere <nuno.semperelh@protonmail.com>
Date: Sat, 13 Apr 2024 22:14:59 -0400
tweak polls in main and restructure README
Diffstat:
| M | README.md | | | 61 | ++++++++++++++++++++++++++++++++++++++++++------------------- |
| A | data/polls/fields.sh | | | 2 | ++ |
| M | main.go | | | 84 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
3 files changed, 127 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
@@ -1,19 +1,42 @@
-Electoral votes/election
-
-Year | Republican electoral college votes
-2000 | 271
-2004 | 286
-2008 | 173
-2012 | 206
-2016 | 304
-2020 | 232
-
-Year | Democrat electoral college votes
-2000 | 266
-2004 | 251
-2008 | 365
-2012 | 332
-2016 | 227
-2020 | 232
-
-Ok, so because Obama really trounced the opposition, it won by more than the Republicans won, which means that the baserates approach didn't really work.
+# Nuño's deads imple election simulator
+
+## First round: just consider the base rates.
+
+- [x] Get past electoral college results since 2000
+- [x] Get number of electors for each state with the new census
+- [x] Combine the two to get an initial base rates analysis
+
+This initial approach gives a 25% to republicans winning in the 2024 election. Why is this? Well, consider the number of electoral college votes:
+
+| Year | Republican electoral college votes |
+| ---- | --- |
+| 2000 | 271 |
+| 2004 | 286 |
+| 2008 | 173 |
+| 2012 | 206 |
+| 2016 | 304 |
+| 2020 | 232 |
+
+| Year | Democrat electoral college votes
+| ---- | --- |
+| 2000 | 266 |
+| 2004 | 251 |
+| 2008 | 365 |
+| 2012 | 332 |
+| 2016 | 227 |
+| 2020 | 232 |
+
+When Democrats won with Obama, they won by a lot, whereas when Republicans won with Bush and Trump, they won by a smaller amount. Or, in other words, this initial approach *doesn't take into account that states are correlated*.
+
+Remedy: consider the conditional probabilities? But how? Or, relax assumptions using Laplace's law?
+
+## Second round: just consider polls
+
+- [ ] Download and format
+- [ ] Read
+- [ ] Consider what the standards error should be
+- [ ] Aggregate polls?
+- [ ] Exclude polls older than one month?
+- [ ] ...
+
+## Third
diff --git a/data/polls/fields.sh b/data/polls/fields.sh
@@ -0,0 +1,2 @@
+
+cat president_polls_state.csv | head -n 2 | cut -d, -f13
diff --git a/main.go b/main.go
@@ -14,6 +14,7 @@ type State struct {
Name string
Votes int
VictoriousPartyPerElection map[string]string
+ Polls []Poll
}
type VotesForEachParty struct {
@@ -21,6 +22,12 @@ type VotesForEachParty struct {
Republicans int
}
+type Poll struct {
+ PollId string
+ SampleSize int
+ PollResults map[string]float64
+}
+
// type src = *rand.Rand
/* Globals */
@@ -87,6 +94,79 @@ func readStates() ([]State, error) {
results_file.Close()
}
+ /* Read polls */
+ polls_file, err := os.Open("data/polls/president_polls_state.csv") // Make sure to update this path
+ if err != nil {
+ return nil, fmt.Errorf("error opening the polls file: %v", err)
+ }
+ defer polls_file.Close()
+
+ // Using a temporary map to group poll results by state and poll ID
+ tmp_polls := make(map[string]map[string]Poll)
+
+ polls_reader := csv.NewReader(polls_file)
+ _, err = polls_reader.Read() // Skip the header
+ if err != nil {
+ return nil, fmt.Errorf("error reading polls header: %v", err)
+ }
+
+ for {
+ record, err := polls_reader.Read()
+ if err != nil {
+ break // EOF or an error
+ }
+ state_name := record[12]
+ // fmt.Printf("State: %s", state_name)
+ poll_id := record[0]
+
+ sampleSize, err := strconv.Atoi(record[22])
+ if err != nil {
+ continue // If error, skip this record
+ }
+
+ candidateName := record[44]
+ pct, err := strconv.ParseFloat(record[47], 64) // pct is in the 42nd column
+ if err != nil {
+ fmt.Printf("Error parsing percentage")
+ continue // If error, skip this record
+ }
+
+ if _, exists := tmp_polls[state_name]; !exists {
+ tmp_polls[state_name] = make(map[string]Poll)
+ }
+
+ poll, exists := tmp_polls[state_name][poll_id]
+ if !exists {
+ poll = Poll{
+ PollId: poll_id,
+ SampleSize: sampleSize,
+ PollResults: make(map[string]float64),
+ }
+ }
+ poll.PollResults[candidateName] = pct
+ tmp_polls[state_name][poll_id] = poll
+ }
+
+ // Add the aggregated poll data to the respective states
+ for state_name, polls := range tmp_polls {
+ var pollsSlice []Poll
+ for _, poll := range polls {
+ pollsSlice = append(pollsSlice, poll)
+ }
+ if state, exists := states[state_name]; exists {
+ state.Polls = pollsSlice
+ states[state_name] = state // Not redundant
+ } else {
+ // fmt.Printf("Encountered new state: %s\n", state_name)
+ /*
+ states[state_name] = State{
+ Name: state_name,
+ Polls: pollsSlice,
+ }
+ */
+ }
+ }
+
// Convert statesData map to a slice for returning
var states_slice []State
for _, state := range states {
@@ -97,6 +177,7 @@ func readStates() ([]State, error) {
}
func sampleFromState(state State) VotesForEachParty {
+ fmt.Printf("%s\n\n", state)
switch state.Name {
case "Nebraska":
// 2000: R
@@ -146,6 +227,7 @@ func simulateElection(states []State) int {
republican_seats := 0
for _, state := range states {
+ // fmt.Printf("%s\n", state)
election_sample := sampleFromState(state)
republican_seats += election_sample.Republicans
/*
@@ -176,7 +258,7 @@ func main() {
return
}
- n_sims := 100_000
+ n_sims := 1
p_republicans := 0.0
for i := 0; i < n_sims; i++ {