Skip to content

Commit

Permalink
degaussverse 1.0 updates (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
erikarasnick authored Apr 4, 2022
1 parent 18850df commit 61f3091
Show file tree
Hide file tree
Showing 13 changed files with 1,539 additions and 248 deletions.
4 changes: 1 addition & 3 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@

# except what we need
!/renv.lock
!/tracts_2010_sf_5072.rds
!/tract_dep_index_2018.rds
!/dep_index.R
!/entrypoint.R
40 changes: 40 additions & 0 deletions .github/workflows/build-deploy-pr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: build-deploy-pr
on:
pull_request:
jobs:
deploy-images:
runs-on: ubuntu-latest
env:
registry: ghcr.io
username: degauss-org
repository: dep_index
strategy:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v2
- name: create latest tag variable
run: |
container="${{ env.registry }}/${{ env.username}}/${{ env.repository }}:latest"
echo "container=${container}" >> $GITHUB_ENV
- name: create pull request tag variable based on name of associated branch
if: github.event_name == 'pull_request'
run: |
versioned="${{ env.registry }}/${{ env.username}}/${{ env.repository }}:${GITHUB_HEAD_REF}"
echo "versioned=${versioned}" >> $GITHUB_ENV
- name: build container
run: |
docker build -t ${{ env.container }} .
- name: test run container
run: |
docker run --rm -v "${PWD}/test":/tmp ${{ env.container }} my_address_file_geocoded.csv
- name: login to ghcr
uses: docker/login-action@v1
with:
registry: ${{ env.registry }}
username: ${{ env.username }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: deploy pull request container
run: |
docker tag ${{ env.container }} ${{ env.versioned }}
docker push ${{ env.versioned }}
42 changes: 42 additions & 0 deletions .github/workflows/build-deploy-release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: build-deploy-release
on:
release:
types: [published]
jobs:
deploy-images:
runs-on: ubuntu-latest
env:
registry: ghcr.io
username: degauss-org
repository: dep_index
strategy:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v2
- name: create latest tag variable
run: |
container="${{ env.registry }}/${{ env.username}}/${{ env.repository }}:latest"
echo "container=${container}" >> $GITHUB_ENV
- name: create release tag variable
if: github.event_name == 'release'
run: |
versioned="${{ env.registry }}/${{ env.username}}/${{ env.repository }}:${GITHUB_REF##*/}"
echo "versioned=${versioned}" >> $GITHUB_ENV
- name: build container
run: |
docker build -t ${{ env.container }} .
- name: test container
run: |
docker run --rm -v "${PWD}/test":/tmp ${{ env.container }} my_address_file_geocoded.csv
- name: login to ghcr
uses: docker/login-action@v1
with:
registry: ${{ env.registry }}
username: ${{ env.username }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: deploy release (and latest) container
run: |
docker tag ${{ env.container }} ${{ env.versioned }}
docker push ${{ env.versioned }}
docker push ${{ env.container }}
35 changes: 15 additions & 20 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM rocker/r-ver:4.0.4
FROM rocker/r-ver:4.0.5

# DeGAUSS container metadata
ENV degauss_name="dep_index"
ENV degauss_version="0.1"
ENV degauss_version="0.2.0"
ENV degauss_description="census tract-level deprivation index"

# add OCI labels based on environment variables too
Expand All @@ -11,33 +11,28 @@ LABEL "org.degauss.version"="${degauss_version}"
LABEL "org.degauss.description"="${degauss_description}"
LABEL "org.degauss.argument"="${degauss_argument}"

# ADD https://geomarker.s3.us-east-2.amazonaws.com/geometries/tracts_2010_sf_5072.rds /opt/tracts_2010_sf_5072.rds
# ADD https://geomarker.s3.us-east-2.amazonaws.com/tract_dep_index_2018.rds /opt/tract_dep_index_18.rds
RUN R --quiet -e "install.packages('remotes', repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/focal/latest'))"

COPY tracts_2010_sf_5072.rds /opt/tracts_2010_sf_5072.rds
COPY tract_dep_index_2018.rds /opt/tract_dep_index_18.rds

# install required version of renv
RUN R --quiet -e "install.packages('remotes', repos = 'https://cran.rstudio.com')"
# make sure version matches what is used in the project: packageVersion('renv')
ENV RENV_VERSION 0.13.2
RUN R --quiet -e "remotes::install_github('rstudio/renv@${RENV_VERSION}')"
RUN R --quiet -e "remotes::install_github('rstudio/[email protected]')"

WORKDIR /app

RUN apt-get update \
&& apt-get install -yqq --no-install-recommends \
libgdal-dev \
libgeos-dev \
libudunits2-dev \
libproj-dev \
&& apt-get clean
&& apt-get install -yqq --no-install-recommends \
libgdal-dev \
libgeos-dev \
libudunits2-dev \
libproj-dev \
&& apt-get clean

COPY renv.lock .

RUN R --quiet -e "renv::restore(repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/focal/latest'))"

COPY dep_index.R .
ADD https://geomarker.s3.us-east-2.amazonaws.com/geometries/tracts_2010_sf_5072.rds /opt/tracts_2010_sf_5072.rds
ADD https://geomarker.s3.us-east-2.amazonaws.com/tract_dep_index_2018.rds /opt/tract_dep_index_18.rds
COPY entrypoint.R .

WORKDIR /tmp

ENTRYPOINT ["/app/dep_index.R"]
ENTRYPOINT ["/app/entrypoint.R"]
2 changes: 1 addition & 1 deletion LICENSE.md → LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -592,4 +592,4 @@ proprietary programs. If your program is a subroutine library, you may consider
more useful to permit linking proprietary applications with the library. If this is
what you want to do, use the GNU Lesser General Public License instead of this
License. But first, please read
&lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
&lt;<http://www.gnu.org/philosophy/why-not-lgpl.html>&gt;.
34 changes: 5 additions & 29 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,37 +1,13 @@
REGISTRY_HOST=docker.io
USERNAME=degauss
NAME=$(shell basename "$(CURDIR)")
IMAGE=$(REGISTRY_HOST)/$(USERNAME)/$(NAME)

.PHONY: build test shell release clean
.PHONY: build test shell clean

build:
docker build -t $(IMAGE) .
docker build -t dep_index .

test:
docker run --rm -v "${PWD}/test":/tmp $(IMAGE) my_address_file_geocoded.csv
docker run --rm -v "${PWD}/test":/tmp dep_index my_address_file_geocoded.csv

shell:
docker run --rm -it --entrypoint=/bin/bash -v "${PWD}/test":/tmp $(IMAGE)

release:
ifndef VERSION
$(error VERSION is not set. Usage: "make release VERSION=X.X")
endif
ifndef DOCKER_USERNAME
$(error DOCKER_USERNAME is not set)
endif
ifndef DOCKER_PAT
$(error DOCKER_PAT is not set)
endif
git commit -am "Release for image version $(VERSION)" --allow-empty
git tag -a $(VERSION) -m "${VERSION}"
git push origin ${VERSION}
git push
echo "${DOCKER_PAT}" | docker login -u "${DOCKER_USERNAME}" --password-stdin
docker tag ${IMAGE}:latest ${IMAGE}:${VERSION}
docker push ${IMAGE}:${VERSION}
docker push ${IMAGE}:latest
docker run --rm -it --entrypoint=/bin/bash -v "${PWD}/test":/tmp dep_index

clean:
docker system prune -f
docker system prune -f
39 changes: 28 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,42 @@
# dep_index <a href='https://degauss-org.github.io/DeGAUSS/'><img src='DeGAUSS_hex.png' align='right' height='138.5' /></a>
# dep_index <a href='https://degauss.org'><img src='https://github.com/degauss-org/degauss_hex_logo/raw/main/PNG/degauss_hex.png' align='right' height='138.5' /></a>

> DeGAUSS container that adds census tract deprivation index to [geocoded](https://degauss.org/geocoder) addresses
[![](https://img.shields.io/github/v/release/degauss-org/dep_index?color=469FC2&label=version&sort=semver)](https://github.com/degauss-org/dep_index/releases)
[![container build status](https://github.com/degauss-org/dep_index/workflows/build-deploy-release/badge.svg)](https://github.com/degauss-org/dep_index/actions/workflows/build-deploy-release.yaml)

[![GitHub Latest Tag](https://img.shields.io/github/v/tag/degauss-org/dep_index)](https://github.com/degauss-org/dep_index/releases)
## Using

## DeGAUSS example call

If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat` and `lon`, then
If `my_address_file_geocoded.csv` is a file in the current working directory with coordinate columns named `lat` and `lon`, then the [DeGAUSS command](https://degauss.org/using_degauss.html#DeGAUSS_Commands):

```sh
docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/dep_index:0.1 my_address_file_geocoded.csv
docker run --rm -v $PWD:/tmp ghcr.io/degauss-org/dep_index:0.2.0 my_address_file_geocoded.csv
```

will produce `my_address_file_geocoded_dep_index_v0.1.csv` with added columns named `fips_tract_id`, `fraction_assisted_income`, `fraction_high_school_edu`, `median_income`, `fraction_no_health_ins`, `fraction_poverty`, `fraction_vacant_housing`, and `dep_index`.
will produce `my_address_file_geocoded_dep_index_0.2.0.csv` with added columns:

- **`fips_tract_id`**: 2010 census tract identifier

- 2018 American Community Survey variables:

## Deprivation Index details
+ **`fraction_assisted_income`**: fraction of households receiving public assistance income or food stamps or SNAP in the past 12 months
+ **`fraction_high_school_edu`**: fraction of population 25 and older with educational attainment of at least high school graduation (includes GED equivalency)
+ **`median_income`**: median household income in the past 12 months in 2018 inflation-adjusted dollars
+ **`fraction_no_health_ins`**: fraction of poulation with no health insurance coverage
+ **`fraction_poverty`**: fraction of population with income in past 12 months below poverty level
+ **`fraction_vacant_housing`**: fraction of houses that are vacant

- **`dep_index`**: composite measure of the 6 variables above

## Geomarker Methods

This container overlays the input latitude and longitude coordinates with 2010 census tracts, then joins with tract-level deprivation index data derived from the 2018 American Community Survey (ACS).

For more information on the deprivation index, please see the [deprivation index page](https://geomarker.io/dep_index)
For more information on the deprivation index, please see the [deprivation index page](https://geomarker.io/dep_index/).

## Geomarker Data

- 2010 tract shape files are stored at: [`s3://geomarker/geometries/tracts_2010_sf_5072.rds`](https://geomarker.s3.us-east-2.amazonaws.com/geometries/tracts_2010_sf_5072.rds).
- 2018 deprivation index data is stored at: [`s3://geomarker/tract_dep_index_2018.rds`](https://geomarker.s3.us-east-2.amazonaws.com/tract_dep_index_2018.rds) and is also available for download at [https://geomarker.io/dep_index/](https://geomarker.io/dep_index/).

## DeGAUSS details
## DeGAUSS Details

For detailed documentation on DeGAUSS, including general usage and installation, please see the [DeGAUSS homepage](https://degauss.org).
46 changes: 0 additions & 46 deletions dep_index.R

This file was deleted.

44 changes: 44 additions & 0 deletions entrypoint.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/local/bin/Rscript

dht::greeting()

## load libraries without messages or warnings
withr::with_message_sink("/dev/null", library(dplyr))
withr::with_message_sink("/dev/null", library(tidyr))
withr::with_message_sink("/dev/null", library(sf))

doc <- "
Usage:
entrypoint.R <filename>
"

opt <- docopt::docopt(doc)

## for interactive testing
## opt <- docopt::docopt(doc, args = 'test/my_address_file_geocoded.csv')
## opt <- docopt::docopt(doc, args = 'my_address_file_geocoded.csv')

message("reading input file...")
d <- dht::read_lat_lon_csv(opt$filename, nest_df = T, sf = T, project_to_crs = 5072)

dht::check_for_column(d$raw_data, "lat", d$raw_data$lat)
dht::check_for_column(d$raw_data, "lon", d$raw_data$lon)

## add code here to calculate geomarkers
message("reading tract shapefile...")
tracts10 <- readRDS('/opt/tracts_2010_sf_5072.rds')

message("joining to 2010 TIGER/Line+ census tracts using EPSG:5072 projection")
d_tract <- st_join(d$d, tracts10) %>%
st_drop_geometry()

message("reading deprivation index data...")
dep_index18 <- readRDS('/opt/tract_dep_index_18.rds')

message("joining 2018 tract-level deprivation index")
d_tract <- left_join(d_tract, dep_index18, by = c('fips_tract_id' = 'census_tract_fips'))

## merge back on .row after unnesting .rows into .row
dht::write_geomarker_file(d = d_tract,
raw_data = d$raw_data,
filename = opt$filename)
Loading

0 comments on commit 61f3091

Please sign in to comment.