Study #1: Initial Setup Code

11 Mar
Below is the code I used to set up my data. There are undoubtedly more sophisticated ways to go about it, but I’m not a professional programmer and my rudimentary methods seemed to do the trick. Feel free to double-check my work! The raw data file is available for download on Google Drive if you’d like to play around with it on your own or rerun my code in RStudio.
# SCIENTIFIC ENNEAGRAM EXPLORATORY STUDY #1 ####
# This script contains all the code necessary to set up the dataset for the
# neurodivergence, mental health, and birth order tests of Study #1
# (generated and assessed by Danielle, owner of The Scientific Enneagram)

# 1: Set the working directory to the folder with the raw data file ####

# 2: Load the source file into a dataframe (here called "raw_unfiltered") ####
raw_unfiltered <- read.csv("raw data_final.csv",header=T,na.strings=c("","NA"))
 ### check that raw_unfiltered contains what you expect (1925 obs, 73 vars)

# 3: Filter out unusable responses ####
## Respondents who didn't report a Type
summary(as.factor(raw_unfiltered$Type)) #shows 38 NA Types
raw_typfilter<-raw_unfiltered[!is.na(raw_unfiltered$Type),] #removes NAs 
 ### check that raw_typfilter reduced observations by 38, from 1925 to 1887

## Respondents who aren't fairly (4) or extremely (5) confident in reported Type
summary(as.factor(raw_typfilter$Confidence)) #shows 69 1s/2s/3s and 58 NA
raw_confilter1<-raw_typfilter[!is.na(raw_typfilter$Confidence),] #removes NAs
 ### check that raw_confilter1 reduced observations by 58, from 1887 to 1829
raw_confilter2<-raw_confilter1[raw_confilter1$Confidence==4 |
                               raw_confilter1$Confidence==5,] #keeps only 4s/5s
 ### check that raw_confilter2 reduced observations by 69, from 1829 to 1760

## Respondents who are under age 18
summary(as.factor(raw_confilter2$Age)) #shows 21 under 18
raw_agefilter<-raw_confilter2[raw_confilter2$Age != "Under 18",] #removes <18
 ### check that raw_agefilter reduced observations by 21, from 1760 to 1739

## Rename the filtered dataframe for ease of reference
raw <- raw_agefilter[,] 
 ### check that raw has 1739 observations and 73 variables

# 4: Set up variables in a usable format ####
## Assign categorical variables
raw$Age <- as.factor(raw$Age)
raw$Country <-as.factor(raw$Country)
raw$Type <- as.factor(raw$Type)
raw$Type6version <- as.factor(raw$Type6version)
raw$Confidence <- as.factor(raw$Confidence)

## Assign categorical variables and rename for clarity
raw$Education <-factor(raw$Education,
                       levels = c("Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, etc.)",
                                  "Bachelor's degree",
                                  "Associates or technical degree (vocational or similar)",
                                  "Some college (university) but no degree",
                                  "Completed high school (secondary) or GED",
                                  "Some high school (secondary) or less"),
                       labels = c("Graduate/Professional","Bachelors",
                                  "Asso/Tech Degree","Some college","HS/GED",
                                  "Some HS"))
raw$Arrow <- factor(raw$Arrow,
                    levels = c(2,0,1),
                    labels = c("Stress","Neutral","Growth"))
raw$Wings <- factor(raw$Wings,
                    levels = c(19,91,2,0),
                    labels = c("Clockwise","Counterclockwise","Both","Neither"))
raw$Ego <- factor(raw$Ego,
                  levels = c(1,2,3),
                  labels = c("Unhealthy","Agerage","Healthy"))
raw$NeuroManage <- factor(raw$NeuroManage,
                          levels = c(1,2,3),
                          labels = c("Y-meds","Y-no meds","N"))
raw$MentalHealthManage <- factor(raw$MentalHealthManage,
                                 levels = c(1,2,3),
                                 labels = c("Yes, meds","Yes, no meds","No"))
raw$OrdinalBO <- factor(raw$OrdinalBO,
                        levels = c(0,1,2,3),
                        labels = c("Only","Oldest","Middle","Youngest"))

## Reduce country list
    install.packages("forcats") 
    library(forcats) #needed for fct_collapse function
summary(raw$Country) #shows counts of respondent countries
raw$CountryTop5 <-fct_collapse(raw$Country, 
                            USA="United States of America",
                            UK="United Kingdom of Great Britain and Northern Ireland",
                            Canada="Canada",
                            Australia="Australia",
                            SouthAfrica="South Africa",
                            other_level = "Other") #collapses non-Top5 into one
 ### check that 'raw' dataframe now has 74 variables

## Assign categorical variables and manually set ordering
raw$Gender <- factor(raw$Gender,
                     levels = c("Female","Male","Non-binary / third gender",
                                "Prefer to self-describe","Prefer not to say"))
raw$CountryTop5 <- factor(raw$CountryTop5, 
                          levels = c("USA","Canada","UK","Australia",
                                     "SouthAfrica","Other")) 
raw$Type6version <- factor(raw$Type6version,
                           levels = c("Phobic (avoidant)",
                                      "Counterphobic (aggressive)","I don't know"))
raw$Confidence <-factor(raw$Confidence,
                        levels = c(4,5),
                        labels = c("Fairly Confident","Extremely Confident"))

## Separate Partner Account checklist into unique variables
    install.packages("stringr")
    library(stringr) #needed for str_detect function
raw$enneagramwithabbey <- factor(str_detect(raw$Partners,"1"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
raw$enneagramexplained <- factor(str_detect(raw$Partners,"2"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
raw$enneagrampaths     <- factor(str_detect(raw$Partners,"3"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
raw$mirabellecreations <- factor(str_detect(raw$Partners,"4"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
raw$healthy9club       <- factor(str_detect(raw$Partners,"5"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
raw$theninecoach       <- factor(str_detect(raw$Partners,"6"), 
                                 levels=c("TRUE","FALSE"),
                                 labels = c("Follow",""))
 ### check that the 'raw' dataframe now has 80 variables

## Separate Neurodivergence Diagnosis checklist into unique variables 
raw$NeuroPos <- factor(raw$Neurodivergence != 0 & raw$Neurodivergence != 999, 
                       levels = c("TRUE","FALSE"), 
                       labels = c("Neurodivergent diagnosis","No diagnosis")) 
raw$ASDPos <-factor(str_detect(raw$Neurodivergence,"1"), 
                    levels=c("TRUE","FALSE"),
                    labels = c("ASD diagnosis","No diagnosis")) 
raw$ADHDPos <-factor(str_detect(raw$Neurodivergence,"2"), 
                     levels=c("TRUE","FALSE"),
                     labels = c("ADHD diagnosis","No diagnosis")) 
raw$DyslexPos <-factor(str_detect(raw$Neurodivergence,"3"), 
                       levels=c("TRUE","FALSE"),
                       labels = c("Dyslexia diagnosis","No diagnosis")) 
raw$DyspraPos <-factor(str_detect(raw$Neurodivergence,"4"), 
                       levels=c("TRUE","FALSE"),
                       labels = c("Dyspraxia diagnosis","No diagnosis")) 
raw$DysgraPos <-factor(str_detect(raw$Neurodivergence,"5"), 
                       levels=c("TRUE","FALSE"),
                       labels = c("Dysgraphia diagnosis","No diagnosis")) 
raw$DyscalPos <-factor(str_detect(raw$Neurodivergence,"6"), 
                       levels=c("TRUE","FALSE"),
                       labels = c("Dyscalculia diagnosis","No diagnosis")) 
raw$OCDPos <-factor(str_detect(raw$Neurodivergence,"7"), 
                    levels=c("TRUE","FALSE"),
                    labels = c("OCD diagnosis","No OCD"))
 ### check that the 'raw' dataframe now has 88 variables

# Separate Mental Health Disorder Diagnoses checklist into unique variables
raw$MHPos <-factor(!is.na(raw$MentalHealth) & raw$MentalHealth != 0 & 
                   raw$MentalHealth != 999, levels = c("TRUE","FALSE"), 
                   labels = c("MH diagnosis","No diagnosis")) 
raw$AnxPos <-factor(str_detect(raw$MentalHealth,"1"), levels=c("TRUE","FALSE"),
                    labels = c("Anxiety diagnosis","No diagnosis")) 
raw$DepPos <-factor(str_detect(raw$MentalHealth,"2"), levels=c("TRUE","FALSE"),
                    labels = c("Depression diagnosis","No diagnosis")) 
raw$BPDPos <-factor(str_detect(raw$MentalHealth,"3"), levels=c("TRUE","FALSE"),
                    labels = c("Bipolar diagnosis","No diagnosis")) 
raw$PTSDPos <-factor(str_detect(raw$MentalHealth,"4"), levels=c("TRUE","FALSE"),
                     labels = c("PTSD diagnosis","No diagnosis")) 
 ### check that the 'raw' dataframe now has 93 variables


# 5. Set up ScientificEnneagram Color Palette ####
## Identify @SE colors
SciEn_color <- function(...) {
  SciEn_colors <- c(`1lt`="#A0D1CA",`1dk`="#1D8296",`2lt`="#e5bcd7",`2dk`="#93272C",
           `3lt`="#f4623a",`3dk`="#93272C",`4lt`="#f8ad6d",`4dk`="#be5500",
           `5lt`="#98b6e4",`5dk`="#5e8dda",`6lt`="#fed880",`6dk`="#e0a526",
           `7lt`="#ac9f3d",`7dk`="#8a7b19",`8lt`="#f8ad6d",`8dk`="#be5500",
           `9lt`="#f8c1b8",`9dk`="#f17f70",
           `basedark`="#005151",`basemed`="#c39367",`baselight`="#f2e9db")
  cols <- c(...)
  if (is.null(cols))
    return (SciEn_colors)
  SciEn_colors[cols]
}

## Create palettes using @SE colors
SciEn_palette <- function(palette = "darkType", ...) {
  SciEn_palettes <- list(
    `darkType`=SciEn_color("1dk","2dk","3lt","4lt","5dk","6dk","7dk","8dk","9dk"),
    `lightType`=SciEn_color("1lt","2lt","3dk","4lt","5lt","6lt","7lt","8dk","9lt"),
    `base` = SciEn_color("basedark","basemed","baselight"),
    `water` = SciEn_color("1lt","1dk","5lt","5dk","7lt","7dk","basedark"),
    `fire`= SciEn_color("2lt","2dk","3lt","4lt","9dk","4dk","6lt","9lt","6dk"),
    `stoplight` = SciEn_color("3lt","5lt","7dk","baselight"),
    `1wings` = SciEn_color("2lt","9lt","1dk","1lt"),
    `2wings` = SciEn_color("3lt","1lt","2lt","2dk"),
    `3wings` = SciEn_color("4lt","2lt","3dk","3lt"),
    `4wings` = SciEn_color("5lt","3lt","4lt","4dk"),
    `5wings` = SciEn_color("6lt","4lt","5dk","5lt"),
    `6wings` = SciEn_color("7lt","5lt","6dk","6lt"),
    `7wings` = SciEn_color("8lt","6lt","7dk","7lt"),
    `8wings` = SciEn_color("9lt","7lt","8dk","8lt"),
    `9wings` = SciEn_color("1lt","8lt","9dk","9lt")
  )
  SciEn_palettes[[palette]]
}

## Create functions to use @SE palettes in graphs/charts
palette_gen <- function(palette = "darkType", direction = 1) {
  function(n) {
    if (n > length(SciEn_palette(palette)))
    warning("Not enough colors in this palette!")
    else {
      all_colors <- SciEn_palette(palette)
      all_colors <- unname(unlist(all_colors))
      all_colors <- if (direction >= 0) all_colors else rev(all_colors)
      color_list <- all_colors[1:n]}
  }
}
scale_fill_SciEn <- function(palette = "darkType", direction = 1, ...) {
  ggplot2::discrete_scale("fill", "SciEn",  palette_gen(palette, direction),...)
}
Danielle Fuller
Study #1: Initial Setup Code

Study #2: Nerdiness Code

Study #1: Neurodivergence Code