library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.1 ─
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::between()   masks data.table::between()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x purrr::transpose() masks data.table::transpose()
library(tidymodels)
## Registered S3 method overwritten by 'tune':
##   method                   from   
##   required_pkgs.model_spec parsnip
## ─ Attaching packages ─────────────────── tidymodels 0.1.3 ─
## ✓ broom        0.7.9      ✓ rsample      0.1.0 
## ✓ dials        0.0.10     ✓ tune         0.1.6 
## ✓ infer        1.0.0      ✓ workflows    0.2.3 
## ✓ modeldata    0.1.1      ✓ workflowsets 0.1.0 
## ✓ parsnip      0.1.7      ✓ yardstick    0.0.8 
## ✓ recipes      0.1.17
## ─ Conflicts ───────────────────── tidymodels_conflicts() ─
## x dplyr::between()   masks data.table::between()
## x scales::discard()  masks purrr::discard()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks data.table::first()
## x recipes::fixed()   masks stringr::fixed()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks data.table::last()
## x yardstick::spec()  masks readr::spec()
## x recipes::step()    masks stats::step()
## x purrr::transpose() masks data.table::transpose()
## • Use tidymodels_prefer() to resolve common conflicts.
library(skimr)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(tidytext)
train_raw <- read_csv("train.csv")
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 10000 Columns: 16
## ─ Column specification ────────────────────────────
## Delimiter: ","
## chr  (4): city, description, homeType, priceRange
## dbl (11): uid, latitude, longitude, garageSpaces, yearBuilt, numOfPatioAndPo...
## lgl  (1): hasSpa
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
train_raw %>% 
    count(priceRange)
## # A tibble: 5 × 2
##   priceRange        n
##   <chr>         <int>
## 1 0-250000       1249
## 2 250000-350000  2356
## 3 350000-450000  2301
## 4 450000-650000  2275
## 5 650000+        1819
price_plot <-
  train_raw %>%
  mutate(priceRange = parse_number(priceRange)) %>%
  ggplot(aes(longitude, latitude, z = priceRange)) +
  stat_summary_hex(alpha = 0.8, bins = 50) +
  scale_fill_viridis_c() +
  labs(
    fill = "mean",
    title = "Price"
  )

price_plot