rm(list = ls())
Iran2009Election = read.csv('Election2009Iran.csv')
head(Iran2009Election)
## Region Ahmadinejad X. Rezai X..1 Karrubi X..2 Mousavi X..3
## 1 East Azerbaijan 1,131,111 56.75 16,920 0.85 7,246 0.36 837,858 42.04
## 2 West Azerbaijan 623,946 47.48 12,199 0.93 21,609 1.64 656,508 49.95
## 3 Ardabil 325,911 51.11 6,578 1.03 2,319 0.36 302,825 47.49
## 4 Isfahan 1,799,255 68.88 51,788 1.98 14,579 0.56 746,697 28.58
## 5 Ilam 199,654 64.58 5,221 1.69 7,471 2.42 96,826 31.32
## 6 Bushehr 299,357 61.37 7,608 1.56 3,563 0.73 177,268 36.34
## Total.votes Invalid.votes Valid.votes Eligible.voters Turnout...
## 1 2,010,340 17,205 1,993,135 2,461,553 80.97
## 2 1,334,356 20,094 1,314,262 1,883,144 69.79
## 3 642,005 4,372 637,633 804,881 79.22
## 4 2,637,482 25,163 2,612,319 2,987,946 87.43
## 5 312,667 3,495 309,172 357,687 86.44
## 6 493,989 6,193 487,796 580,822 83.98
tail(Iran2009Election)
## Region Ahmadinejad X. Rezai X..1 Karrubi X..2 Mousavi X..3
## 25 Lorestan 677,829 70.91 14,920 1.56 44,036 4.61 219,156 22.93
## 26 Mazandaran 1,289,257 67.70 19,587 1.03 10,050 0.53 585,373 30.74
## 27 Markazi 572,988 73.64 10,057 1.29 4,675 0.60 190,349 24.46
## 28 Hormozgan 482,990 65.50 7,237 0.98 5,126 0.70 241,988 32.82
## 29 Hamadan 765,723 75.86 13,117 1.30 12,032 1.19 218,481 21.65
## 30 Yazd 337,178 55.83 8,406 1.39 2,565 0.42 255,799 42.35
## Total.votes Invalid.votes Valid.votes Eligible.voters Turnout...
## 25 964,270 8,329 955,941 1,124,940 84.98
## 26 1,919,838 15,571 1,904,267 1,915,240 99.43
## 27 785,961 7,892 778,069 885,557 87.86
## 28 743,024 5,683 737,341 919,908 80.15
## 29 1,019,169 9,816 1,009,353 1,256,250 80.35
## 30 609,856 5,908 603,948 609,341 99.11
str(Iran2009Election)
## 'data.frame': 30 obs. of 14 variables:
## $ Region : chr "East Azerbaijan" "West Azerbaijan" "Ardabil" "Isfahan" ...
## $ Ahmadinejad : chr "1,131,111" "623,946" "325,911" "1,799,255" ...
## $ X. : num 56.8 47.5 51.1 68.9 64.6 ...
## $ Rezai : chr "16,920" "12,199" "6,578" "51,788" ...
## $ X..1 : num 0.85 0.93 1.03 1.98 1.69 1.56 1.99 4.61 1.04 1.42 ...
## $ Karrubi : chr "7,246" "21,609" "2,319" "14,579" ...
## $ X..2 : num 0.36 1.64 0.36 0.56 2.42 0.73 0.91 0.84 0.24 0.43 ...
## $ Mousavi : chr "837,858" "656,508" "302,825" "746,697" ...
## $ X..3 : num 42 50 47.5 28.6 31.3 ...
## $ Total.votes : chr "2,010,340" "1,334,356" "642,005" "2,637,482" ...
## $ Invalid.votes : chr "17,205" "20,094" "4,372" "25,163" ...
## $ Valid.votes : chr "1,993,135" "1,314,262" "637,633" "2,612,319" ...
## $ Eligible.voters: chr "2,461,553" "1,883,144" "804,881" "2,987,946" ...
## $ Turnout... : num 81 69.8 79.2 87.4 86.4 ...
# Looking at the str function, multiple regions have voting numbers listed as chr instead of num. Additionally, several variables have unclear names followed by .1, .2, etc.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.1.4 âś” readr 2.1.5
## âś” forcats 1.0.1 âś” stringr 1.5.2
## âś” ggplot2 4.0.0 âś” tibble 3.3.0
## âś” lubridate 1.9.4 âś” tidyr 1.3.1
## âś” purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
TidyIran2009_Election = read_csv('Election2009Iran.csv')
## New names:
## Rows: 30 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): Region dbl (5): %...3, %...5, %...7, %...9, Turnout, % num (8):
## Ahmadinejad, Rezai, Karrubi, Mousavi, Total votes, Invalid votes, V...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `%` -> `%...3`
## • `%` -> `%...5`
## • `%` -> `%...7`
## • `%` -> `%...9`
str(TidyIran2009_Election)
## spc_tbl_ [30 Ă— 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Region : chr [1:30] "East Azerbaijan" "West Azerbaijan" "Ardabil" "Isfahan" ...
## $ Ahmadinejad : num [1:30] 1131111 623946 325911 1799255 199654 ...
## $ %...3 : num [1:30] 56.8 47.5 51.1 68.9 64.6 ...
## $ Rezai : num [1:30] 16920 12199 6578 51788 5221 ...
## $ %...5 : num [1:30] 0.85 0.93 1.03 1.98 1.69 1.56 1.99 4.61 1.04 1.42 ...
## $ Karrubi : num [1:30] 7246 21609 2319 14579 7471 ...
## $ %...7 : num [1:30] 0.36 1.64 0.36 0.56 2.42 0.73 0.91 0.84 0.24 0.43 ...
## $ Mousavi : num [1:30] 837858 656508 302825 746697 96826 ...
## $ %...9 : num [1:30] 42 50 47.5 28.6 31.3 ...
## $ Total votes : num [1:30] 2010340 1334356 642005 2637482 312667 ...
## $ Invalid votes : num [1:30] 17205 20094 4372 25163 3495 ...
## $ Valid votes : num [1:30] 1993135 1314262 637633 2612319 309172 ...
## $ Eligible voters: num [1:30] 2461553 1883144 804881 2987946 357687 ...
## $ Turnout, % : num [1:30] 81 69.8 79.2 87.4 86.4 ...
## - attr(*, "spec")=
## .. cols(
## .. Region = col_character(),
## .. Ahmadinejad = col_number(),
## .. `%...3` = col_double(),
## .. Rezai = col_number(),
## .. `%...5` = col_double(),
## .. Karrubi = col_number(),
## .. `%...7` = col_double(),
## .. Mousavi = col_number(),
## .. `%...9` = col_double(),
## .. `Total votes` = col_number(),
## .. `Invalid votes` = col_number(),
## .. `Valid votes` = col_number(),
## .. `Eligible voters` = col_number(),
## .. `Turnout, %` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
library(benford.analysis)
benfordIran200 = benford(data = TidyIran2009_Election$`Total votes`, number.of.digits = 1)
library(ggplot2)
plot(benfordIran200)

chisq(benfordIran200)
##
## Pearson's Chi-squared test
##
## data: TidyIran2009_Election$`Total votes`
## X-squared = 7.5941, df = 8, p-value = 0.4741