Skip to contents

Creates a data frame that is a summary table of counts and percentages

Usage

dataVizCleaning(df, scale_labels, pre_post = FALSE, na_remove = TRUE)

Arguments

df

Required, A tibble/data frame of survey items that are categorical/character variables.

scale_labels

Required, a character vector of labels for the response scale, must be in the desired order, e.g. if you have a 5 item scale of minimal to extensive it should look like this: levels_min_ext <- c("Minimal", "Slight", "Moderate", "Good", "Extensive").

pre_post

Logical, default is FALSE. If true, returns a tibble with an additional column of timing that is a factor variable of either Pre or Post.

na_remove

Logical, defaults to TRUE. If TRUE, Drops NA values; if FALSE, turns NA's into "Missing" and adds it as a factor in the first position of scale_labels.

Value

A tibble with the data in 5 columns: question, response, n_answers, percent_answers and percent_answers_label. question is the name of the original item, response is all of the categorical responses possible for the item. n_answers is the count of each response, percent_answers is the percentage of each response and percent_answers_label is a character variable of percentage labelled with percent sign for use as text label. If pre_post arg is TRUE, an column of timing is added that is a factor variable of either Pre or Post.

Examples

# Fake data for examples, first are single items and the second has pre-post data with
# correct prefixes in variable names:
items_single <- tibble::tibble(
    Organization = c("Minimal", "Slight", "Moderate", "Good", "Extensive",
                     "Good", "Moderate", "Slight", "Minimal"),
    Source = c("Slight", "Slight", "Moderate", "Extensive", "Good", "Moderate",
                "Slight", "Minimal", "Slight"),
    Publish = c("Minimal", "Minimal", "Minimal", "Slight", "Slight", "Slight",
                "Moderate", "Moderate", "Moderate"),
    Write = c("Slight", "Slight", "Slight", "Moderate", "Moderate", "Moderate",
                "Good", "Good", "Good"),
    Research = c("Minimal", "Minimal", "Slight", "Slight", "Moderate",
                "Moderate", "Good", "Good", "Good")
)

items_pre_post <- tibble::tibble(
    pre_Organization = c("Minimal", "Slight", "Moderate", "Good",
                    "Extensive", "Good", "Moderate", "Slight", "Minimal"),
    post_Organization = c("Slight", "Moderate", "Good", "Extensive",
                    "Extensive", "Extensive", "Good", "Moderate", "Slight"),
    pre_Source = c("Slight", "Slight", "Moderate", "Extensive", "Good",
                    "Moderate", "Slight", "Minimal", "Slight"),
    post_Source = c("Good", "Good", "Extensive", "Extensive", "Good",
                    "Extensive", "Good", "Moderate", "Good"),
    pre_Publish = c("Minimal", "Minimal", "Minimal", "Slight", "Slight",
                    "Slight", "Moderate", "Moderate", "Moderate"),
    post_Publish = c("Moderate", "Moderate", "Moderate", "Good", "Good",
                    "Good", "Extensive", "Extensive", "Extensive"),
    pre_Write = c("Slight", "Slight", "Slight", "Moderate", "Moderate",
                    "Moderate", "Good", "Good", "Good"),
    post_Write = c("Moderate", "Moderate", "Moderate", "Good", "Good",
                    "Good", "Extensive", "Extensive", "Extensive"),
    pre_Research = c("Minimal", "Minimal", "Slight", "Slight", "Moderate",
                    "Moderate", "Good", "Good", "Good"),
    post_Research = c("Slight", "Slight", "Moderate", "Moderate", "Good",
                    "Good", "Extensive", "Extensive", "Extensive")
)
# Add a row of NA values to each fake data set:
items_pre_post_na <- dplyr::rows_append(items_pre_post,
         tibble::as_tibble_row(purrr::set_names(rep(NA, NCOL(items_pre_post)),
         names(items_pre_post))))
items_single_na <- dplyr::rows_append(items_single,
         tibble::as_tibble_row(purrr::set_names(rep(NA, NCOL(items_single)),
         names(items_single))))

# Likert scale to pass to `scale_labels` that is the order to arrange each variable:
levels_min_ext <- c("Minimal", "Slight", "Moderate", "Good", "Extensive")

dataVizCleaning(df = items_single, pre_post = FALSE,
                scale_labels = levels_min_ext, na_remove = TRUE)
#> # A tibble: 20 × 5
#>    question     response  n_answers percent_answers percent_answers_label
#>    <chr>        <fct>         <int>           <dbl> <chr>                
#>  1 Organization Minimal           2           0.222 22%                  
#>  2 Organization Slight            2           0.222 22%                  
#>  3 Organization Moderate          2           0.222 22%                  
#>  4 Organization Good              2           0.222 22%                  
#>  5 Organization Extensive         1           0.111 11%                  
#>  6 Publish      Minimal           3           0.333 33%                  
#>  7 Publish      Slight            3           0.333 33%                  
#>  8 Publish      Moderate          3           0.333 33%                  
#>  9 Research     Minimal           2           0.222 22%                  
#> 10 Research     Slight            2           0.222 22%                  
#> 11 Research     Moderate          2           0.222 22%                  
#> 12 Research     Good              3           0.333 33%                  
#> 13 Source       Minimal           1           0.111 11%                  
#> 14 Source       Slight            4           0.444 44%                  
#> 15 Source       Moderate          2           0.222 22%                  
#> 16 Source       Good              1           0.111 11%                  
#> 17 Source       Extensive         1           0.111 11%                  
#> 18 Write        Slight            3           0.333 33%                  
#> 19 Write        Moderate          3           0.333 33%                  
#> 20 Write        Good              3           0.333 33%                  
dataVizCleaning(df = items_single_na, pre_post = FALSE,
                scale_labels = levels_min_ext, na_remove = FALSE)
#> # A tibble: 25 × 5
#>    question     response  n_answers percent_answers percent_answers_label
#>    <chr>        <fct>         <int>           <dbl> <chr>                
#>  1 Organization Missing           1             0.1 10%                  
#>  2 Organization Minimal           2             0.2 20%                  
#>  3 Organization Slight            2             0.2 20%                  
#>  4 Organization Moderate          2             0.2 20%                  
#>  5 Organization Good              2             0.2 20%                  
#>  6 Organization Extensive         1             0.1 10%                  
#>  7 Publish      Missing           1             0.1 10%                  
#>  8 Publish      Minimal           3             0.3 30%                  
#>  9 Publish      Slight            3             0.3 30%                  
#> 10 Publish      Moderate          3             0.3 30%                  
#> # ℹ 15 more rows
dataVizCleaning(df = items_pre_post, pre_post = TRUE,
                scale_labels = levels_min_ext, na_remove = TRUE)
#> # A tibble: 37 × 6
#>    question     timing response  n_answers percent_answers percent_answers_label
#>    <chr>        <fct>  <fct>         <int>           <dbl> <chr>                
#>  1 Organization Pre    Minimal           2           0.222 22%                  
#>  2 Organization Pre    Slight            2           0.222 22%                  
#>  3 Organization Pre    Moderate          2           0.222 22%                  
#>  4 Organization Pre    Good              2           0.222 22%                  
#>  5 Organization Pre    Extensive         1           0.111 11%                  
#>  6 Organization Post   Slight            2           0.222 22%                  
#>  7 Organization Post   Moderate          2           0.222 22%                  
#>  8 Organization Post   Good              2           0.222 22%                  
#>  9 Organization Post   Extensive         3           0.333 33%                  
#> 10 Publish      Pre    Minimal           3           0.333 33%                  
#> # ℹ 27 more rows
dataVizCleaning(df = items_pre_post_na, pre_post = TRUE,
                scale_labels = levels_min_ext, na_remove = FALSE)
#> # A tibble: 47 × 6
#>    question     timing response  n_answers percent_answers percent_answers_label
#>    <chr>        <fct>  <fct>         <int>           <dbl> <chr>                
#>  1 Organization Pre    Missing           1             0.1 10%                  
#>  2 Organization Pre    Minimal           2             0.2 20%                  
#>  3 Organization Pre    Slight            2             0.2 20%                  
#>  4 Organization Pre    Moderate          2             0.2 20%                  
#>  5 Organization Pre    Good              2             0.2 20%                  
#>  6 Organization Pre    Extensive         1             0.1 10%                  
#>  7 Organization Post   Missing           1             0.1 10%                  
#>  8 Organization Post   Slight            2             0.2 20%                  
#>  9 Organization Post   Moderate          2             0.2 20%                  
#> 10 Organization Post   Good              2             0.2 20%                  
#> # ℹ 37 more rows