-
Notifications
You must be signed in to change notification settings - Fork 26
/
Jenkinsfile
263 lines (239 loc) · 12.8 KB
/
Jenkinsfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
pipeline {
agent {
docker {
reuseNode false
image 'justaddcoffee/ubuntu20-python-3-8-5-dev:4'
}
}
triggers{
cron('H H 1 1-12 *')
}
environment {
BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim()
S3PROJECTDIR = 'kg-covid-19' // no trailing slash
// Distribution ID for the AWS CloudFront for this bucket
// used solely for invalidations
AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP'
}
options {
timestamps()
}
stages {
// Very first: pause for a minute to give a chance to
// cancel and clean the workspace before use.
stage('Ready and clean') {
steps {
// Give us a minute to cancel if we want.
sleep time: 30, unit: 'SECONDS'
}
}
stage('Initialize') {
steps {
// print some info
dir('./gitrepo') {
sh 'env > env.txt'
sh 'echo $BRANCH_NAME > branch.txt'
sh 'echo "$BRANCH_NAME"'
sh 'cat env.txt'
sh 'cat branch.txt'
sh "echo $BUILDSTARTDATE > dow.txt"
sh "echo $BUILDSTARTDATE"
sh "python3.8 --version"
sh "id"
sh "whoami" // this should be jenkinsuser
// if the above fails, then the docker host didn't start the docker
// container as a user that this image knows about. This will
// likely cause lots of problems (like trying to write to $HOME
// directory that doesn't exist, etc), so we should fail here and
// have the user fix this
}
}
}
stage('Build kg_covid_19') {
steps {
dir('./gitrepo') {
git(
url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19',
branch: env.BRANCH_NAME
)
sh '/usr/bin/python3.8 -m venv venv'
sh '. venv/bin/activate'
sh './venv/bin/pip install .'
sh './venv/bin/pip install awscli pystache boto3 s3cmd'
}
}
}
stage('Download') {
steps {
dir('./gitrepo') {
script {
def run_py_dl = sh(
script: '. venv/bin/activate && python3.8 run.py download', returnStatus: true
)
if (run_py_dl == 0) {
if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch
echo "Will not push if not on correct branch."
} else {
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text --cf-invalidate put -r data/raw s3://kg-hub-public-data/$S3PROJECTDIR/'
}
}
} else { // 'run.py download' failed - let's try to download last good copy of raw/ from s3 to data/
currentBuild.result = "UNSTABLE"
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh 'rm -fr data/raw || true;'
sh 'mkdir -p data/raw || true'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/$S3PROJECTDIR/raw/ data/raw/'
}
}
}
}
}
}
stage('Transform') {
steps {
dir('./gitrepo') {
sh '. venv/bin/activate && env && python3.8 run.py transform'
}
}
}
stage('Merge') {
steps {
dir('./gitrepo') {
sh '. venv/bin/activate && python3.8 run.py merge -y merge_jenkins.yaml'
sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml'
sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml'
}
}
}
stage('Make blazegraph journal'){
steps {
dir('./gitrepo/blazegraph') {
git(
url: 'https://github.com/balhoff/blazegraph-runner.git',
branch: 'master'
)
sh 'HOME=`pwd` && sbt stage' // set HOME here to prevent sbt from trying to make dir .cache in /
sh 'ls -lhd ../data/merged/merged-kg.nt.gz'
sh 'pigz -f -d ../data/merged/merged-kg.nt.gz'
sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt'
sh 'pigz -f ../merged-kg.jnl'
sh 'pigz -f ../data/merged/merged-kg.nt'
}
}
}
stage('Publish') {
steps {
dir('./gitrepo') {
script {
// code for building s3 index files
sh 'git clone https://github.com/justaddcoffee/go-site.git'
// fail early if there's going to be a problem installing these
// make sure we aren't going to clobber existing data
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
REMOTE_BUILD_DIR_CONTENTS = sh (
script: '. venv/bin/activate && s3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$S3PROJECTDIR/$BUILDSTARTDATE/',
returnStdout: true
).trim()
echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'"
if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){
echo "Will not overwrite existing remote S3 directory: $S3PROJECTDIR/$BUILDSTARTDATE"
sh 'exit 1'
} else {
echo "remote directory $S3PROJECTDIR/$BUILDSTARTDATE is empty, proceeding"
}
}
if (env.BRANCH_NAME != 'master') {
echo "Will not push if not on correct branch."
} else {
withCredentials([
file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'),
file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'),
string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) {
//
// make $BUILDSTARTDATE/ directory and sync to s3 bucket
//
sh 'mkdir $BUILDSTARTDATE/'
sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/kg-covid-19.nt.gz'
sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/kg-covid-19.tar.gz'
sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/kg-covid-19.jnl.gz'
// transformed data
sh 'rm -fr data/transformed/.gitkeep'
sh 'cp -pr data/transformed $BUILDSTARTDATE/'
sh 'cp -pr data/raw $BUILDSTARTDATE/'
sh 'cp Jenkinsfile $BUILDSTARTDATE/'
// stats dir
sh 'mkdir $BUILDSTARTDATE/stats/'
sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/'
sh 'cp templates/README.build $BUILDSTARTDATE/README'
// make local $S3PROJECTDIR
sh 'mkdir $S3PROJECTDIR'
sh 'cp templates/README.toplevel $S3PROJECTDIR/README'
// add dir for existing builds so they are indexed
// do an s3cmd ls for our project subdir, for each existing build make a local dir in $S3PROJECTDIR
sh ". venv/bin/activate && for dir in `s3cmd ls s3://kg-hub-public-data/kg-covid-19/ | grep '\\/\$' | awk '{print \$NF}' | grep -w -v -E 'raw|current' | xargs -n1 basename`; do mkdir -p $S3PROJECTDIR/\$dir; done"
// now make two dirs, $BUILDSTARTDATE and current/, both with the same contents
sh 'mv $BUILDSTARTDATE $S3PROJECTDIR/'
sh 'cp -pr $S3PROJECTDIR/$BUILDSTARTDATE $S3PROJECTDIR/current'
//
// put $S3PROJECTDIR/$BUILDSTARTDATE/ and $S3PROJECTDIR/current in s3 bucket
//
sh '. venv/bin/activate && multi_indexer -v --directory $S3PROJECTDIR --prefix https://kg-hub.berkeleybop.io/$S3PROJECTDIR/ -x -u'
// for existing builds on s3, we just made an index.html that will clobber the existing (correct) s3 index.html
// here we download the existing index.html and clobber the local one instead
sh ". venv/bin/activate && for dir in `s3cmd ls s3://kg-hub-public-data/kg-covid-19/ | grep '\\/\$' | awk '{print \$NF}' | grep -w -v -E 'raw|current' | xargs -n1 basename`; do s3cmd get --force --continue s3://kg-hub-public-data/kg-covid-19/\$dir/index.html $S3PROJECTDIR/\$dir/ || true; done"
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG put -pr --acl-public --cf-invalidate $S3PROJECTDIR s3://kg-hub-public-data/'
// Invalidate the CDN now that the new files are up.
sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt'
sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.8 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"'
// Should now appear at:
// https://kg-hub.berkeleybop.io/[artifact name]
}
}
}
}
}
}
stage('Deploy blazegraph') {
when { anyOf { branch 'master' } }
steps {
git([branch: 'master',
credentialsId: 'justaddcoffee_github_api_token_username_pw',
url: 'https://github.com/geneontology/operations.git'])
dir('./ansible') {
withCredentials([file(credentialsId: 'ansible-bbop-local-slave', variable: 'DEPLOY_LOCAL_IDENTITY')]) {
echo 'Push master out to public Blazegraph'
// these commands ensure that ansible's ssh command doesn't
// fail (in a very difficult-to-debug way) when it needs
// us to accept the public key of pan.lbl.gov
sh 'mkdir -p ~/.ssh/'
sh 'ssh-keyscan -H 3.221.221.206 >> ~/.ssh/known_hosts'
retry(3){
sh 'HOME=`pwd` && ansible-playbook update-kg-hub-endpoint.yaml --inventory=hosts.remote-rdf-endpoint --private-key="$DEPLOY_LOCAL_IDENTITY" -e target_user=ubuntu --extra-vars="endpoint=internal"'
}
}
}
}
}
}
post {
always {
echo 'In always'
echo 'Cleaning workspace...'
cleanWs()
}
success {
echo 'I succeeded!'
}
unstable {
echo 'I am unstable :/'
}
failure {
echo 'I failed :('
}
changed {
echo 'Things were different before...'
}
}
}