rm(list = ls())
Iran2009Election = read.csv('Election2009Iran.csv')
head(Iran2009Election)
##            Region Ahmadinejad    X.  Rezai X..1 Karrubi X..2 Mousavi  X..3
## 1 East Azerbaijan   1,131,111 56.75 16,920 0.85   7,246 0.36 837,858 42.04
## 2 West Azerbaijan     623,946 47.48 12,199 0.93  21,609 1.64 656,508 49.95
## 3         Ardabil     325,911 51.11  6,578 1.03   2,319 0.36 302,825 47.49
## 4         Isfahan   1,799,255 68.88 51,788 1.98  14,579 0.56 746,697 28.58
## 5            Ilam     199,654 64.58  5,221 1.69   7,471 2.42  96,826 31.32
## 6         Bushehr     299,357 61.37  7,608 1.56   3,563 0.73 177,268 36.34
##   Total.votes Invalid.votes Valid.votes Eligible.voters Turnout...
## 1   2,010,340        17,205   1,993,135       2,461,553      80.97
## 2   1,334,356        20,094   1,314,262       1,883,144      69.79
## 3     642,005         4,372     637,633         804,881      79.22
## 4   2,637,482        25,163   2,612,319       2,987,946      87.43
## 5     312,667         3,495     309,172         357,687      86.44
## 6     493,989         6,193     487,796         580,822      83.98
tail(Iran2009Election)
##        Region Ahmadinejad    X.  Rezai X..1 Karrubi X..2 Mousavi  X..3
## 25   Lorestan     677,829 70.91 14,920 1.56  44,036 4.61 219,156 22.93
## 26 Mazandaran   1,289,257 67.70 19,587 1.03  10,050 0.53 585,373 30.74
## 27    Markazi     572,988 73.64 10,057 1.29   4,675 0.60 190,349 24.46
## 28  Hormozgan     482,990 65.50  7,237 0.98   5,126 0.70 241,988 32.82
## 29    Hamadan     765,723 75.86 13,117 1.30  12,032 1.19 218,481 21.65
## 30       Yazd     337,178 55.83  8,406 1.39   2,565 0.42 255,799 42.35
##    Total.votes Invalid.votes Valid.votes Eligible.voters Turnout...
## 25     964,270         8,329     955,941       1,124,940      84.98
## 26   1,919,838        15,571   1,904,267       1,915,240      99.43
## 27     785,961         7,892     778,069         885,557      87.86
## 28     743,024         5,683     737,341         919,908      80.15
## 29   1,019,169         9,816   1,009,353       1,256,250      80.35
## 30     609,856         5,908     603,948         609,341      99.11
str(Iran2009Election)
## 'data.frame':    30 obs. of  14 variables:
##  $ Region         : chr  "East Azerbaijan" "West Azerbaijan" "Ardabil" "Isfahan" ...
##  $ Ahmadinejad    : chr  "1,131,111" "623,946" "325,911" "1,799,255" ...
##  $ X.             : num  56.8 47.5 51.1 68.9 64.6 ...
##  $ Rezai          : chr  "16,920" "12,199" "6,578" "51,788" ...
##  $ X..1           : num  0.85 0.93 1.03 1.98 1.69 1.56 1.99 4.61 1.04 1.42 ...
##  $ Karrubi        : chr  "7,246" "21,609" "2,319" "14,579" ...
##  $ X..2           : num  0.36 1.64 0.36 0.56 2.42 0.73 0.91 0.84 0.24 0.43 ...
##  $ Mousavi        : chr  "837,858" "656,508" "302,825" "746,697" ...
##  $ X..3           : num  42 50 47.5 28.6 31.3 ...
##  $ Total.votes    : chr  "2,010,340" "1,334,356" "642,005" "2,637,482" ...
##  $ Invalid.votes  : chr  "17,205" "20,094" "4,372" "25,163" ...
##  $ Valid.votes    : chr  "1,993,135" "1,314,262" "637,633" "2,612,319" ...
##  $ Eligible.voters: chr  "2,461,553" "1,883,144" "804,881" "2,987,946" ...
##  $ Turnout...     : num  81 69.8 79.2 87.4 86.4 ...
# Looking at the str function, multiple regions have voting numbers listed as chr instead of num. Additionally, several variables have unclear names followed by .1, .2, etc.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.1.4     âś” readr     2.1.5
## âś” forcats   1.0.1     âś” stringr   1.5.2
## âś” ggplot2   4.0.0     âś” tibble    3.3.0
## âś” lubridate 1.9.4     âś” tidyr     1.3.1
## âś” purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)

TidyIran2009_Election = read_csv('Election2009Iran.csv')
## New names:
## Rows: 30 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): Region dbl (5): %...3, %...5, %...7, %...9, Turnout, % num (8):
## Ahmadinejad, Rezai, Karrubi, Mousavi, Total votes, Invalid votes, V...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `%` -> `%...3`
## • `%` -> `%...5`
## • `%` -> `%...7`
## • `%` -> `%...9`
str(TidyIran2009_Election)
## spc_tbl_ [30 Ă— 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Region         : chr [1:30] "East Azerbaijan" "West Azerbaijan" "Ardabil" "Isfahan" ...
##  $ Ahmadinejad    : num [1:30] 1131111 623946 325911 1799255 199654 ...
##  $ %...3          : num [1:30] 56.8 47.5 51.1 68.9 64.6 ...
##  $ Rezai          : num [1:30] 16920 12199 6578 51788 5221 ...
##  $ %...5          : num [1:30] 0.85 0.93 1.03 1.98 1.69 1.56 1.99 4.61 1.04 1.42 ...
##  $ Karrubi        : num [1:30] 7246 21609 2319 14579 7471 ...
##  $ %...7          : num [1:30] 0.36 1.64 0.36 0.56 2.42 0.73 0.91 0.84 0.24 0.43 ...
##  $ Mousavi        : num [1:30] 837858 656508 302825 746697 96826 ...
##  $ %...9          : num [1:30] 42 50 47.5 28.6 31.3 ...
##  $ Total votes    : num [1:30] 2010340 1334356 642005 2637482 312667 ...
##  $ Invalid votes  : num [1:30] 17205 20094 4372 25163 3495 ...
##  $ Valid votes    : num [1:30] 1993135 1314262 637633 2612319 309172 ...
##  $ Eligible voters: num [1:30] 2461553 1883144 804881 2987946 357687 ...
##  $ Turnout, %     : num [1:30] 81 69.8 79.2 87.4 86.4 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Region = col_character(),
##   ..   Ahmadinejad = col_number(),
##   ..   `%...3` = col_double(),
##   ..   Rezai = col_number(),
##   ..   `%...5` = col_double(),
##   ..   Karrubi = col_number(),
##   ..   `%...7` = col_double(),
##   ..   Mousavi = col_number(),
##   ..   `%...9` = col_double(),
##   ..   `Total votes` = col_number(),
##   ..   `Invalid votes` = col_number(),
##   ..   `Valid votes` = col_number(),
##   ..   `Eligible voters` = col_number(),
##   ..   `Turnout, %` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
library(benford.analysis)

benfordIran200 = benford(data = TidyIran2009_Election$`Total votes`, number.of.digits = 1)

library(ggplot2)
plot(benfordIran200)

chisq(benfordIran200)
## 
##  Pearson's Chi-squared test
## 
## data:  TidyIran2009_Election$`Total votes`
## X-squared = 7.5941, df = 8, p-value = 0.4741