-
Notifications
You must be signed in to change notification settings - Fork 5
/
Jenkinsfile
1554 lines (1398 loc) · 78.9 KB
/
Jenkinsfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
pipeline {
agent any
// In additional to manual runs, trigger somewhere at midnight to
// give us the max time in a day to get things right.
triggers {
// Master never runs--Feb 31st.
cron('0 0 31 2 *')
// Nightly @12am, for "snapshot", skip "release" night.
//cron('0 0 2-31/2 * *')
// First of the month @12am, for "release" (also "current").
//cron('0 0 1 * *')
}
environment {
///
/// Internal run variables.
///
// The branch of geneontology/go-site to use.
TARGET_GO_SITE_BRANCH = 'master'
// The branch of geneontology/go-stats to use.
TARGET_GO_STATS_BRANCH = 'master'
// The branch of go-ontology to use.
TARGET_GO_ONTOLOGY_BRANCH = 'master'
// The branch of minerva to use.
TARGET_MINERVA_BRANCH = 'master'
// The branch of ROBOT to use in one silly section.
// Necessary due to java version jump.
// https://github.com/ontodev/robot/issues/997
TARGET_ROBOT_BRANCH = 'master'
// The branch of noctua-models to use.
TARGET_NOCTUA_MODELS_BRANCH = 'master'
// The people to call when things go bad. It is a comma-space
// "separated" string.
TARGET_RELEASE_HOLD_EMAILS = '[email protected],[email protected],[email protected],[email protected],[email protected]'
// The file bucket(/folder) combination to use.
TARGET_BUCKET = 'go-data-product-experimental'
// The URL prefix to use when creating site indices.
TARGET_INDEXER_PREFIX = 'http://experimental.geneontology.io'
// This variable should typically be 'TRUE', which will cause
// some additional basic checks to be made. There are some
// very exotic cases where these check may need to be skipped
// for a run, in that case this variable is set to 'FALSE'.
WE_ARE_BEING_SAFE_P = 'TRUE'
// Sanity check for solr index being built--overall min count.
// See https://github.com/geneontology/pipeline/issues/315 .
// Only used on release attempts (as it saves QC time and
// getting the number for all branches would be a trick).
SANITY_SOLR_DOC_COUNT_MIN = 11000000
SANITY_SOLR_BIOENTITY_DOC_COUNT_MIN = 1400000
// Control make to get through our loads faster if
// possible. Assuming we're cpu bound for some of these...
// wok has 48 "processors" over 12 "cores", so I have no idea;
// let's go with conservative and see if we get an
// improvement.
MAKECMD = 'make --jobs 3 --max-load 10.0'
//MAKECMD = 'make'
///
/// PANTHER/PAINT metadata.
///
PANTHER_VERSION = '19.0'
///
/// Application tokens.
///
// The Zenodo concept ID to use for releases (and occasionally
// master testing).
ZENODO_ARCHIVE_CONCEPT = '1170314'
// Distribution ID for the AWS CloudFront for this branch,
// used soley for invalidations. Versioned release does not
// need this as it is always a new location and the index
// upload already has an invalidation on it. For current,
// snapshot, and experimental.
AWS_CLOUDFRONT_DISTRIBUTION_ID = 'E2CDVG5YT5R4K4'
AWS_CLOUDFRONT_RELEASE_DISTRIBUTION_ID = 'E2HF1DWYYDLTQP'
///
/// Ontobio Validation
///
VALIDATION_ONTOLOGY_URL="http://skyhook.berkeleybop.org/master/ontology/go.json"
///
/// Minerva input.
///
// Minerva operating profile.
MINERVA_INPUT_ONTOLOGIES = [
"http://skyhook.berkeleybop.org/master/ontology/extensions/go-lego.owl"
].join(" ")
///
/// GOlr/AmiGO input.
///
// GOlr load profile.
GOLR_SOLR_MEMORY = "128G"
GOLR_LOADER_MEMORY = "192G"
GOLR_INPUT_ONTOLOGIES = [
"http://skyhook.berkeleybop.org/master/ontology/extensions/go-amigo.owl"
].join(" ")
GOLR_INPUT_GAFS = [
//"http://skyhook.berkeleybop.org/master/products/upstream_and_raw_data/paint_other.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/goa_chicken.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/goa_chicken_complex.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/goa_uniprot_all_noiea.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/mgi.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/pombase.gaf.gz",
"http://skyhook.berkeleybop.org/master/annotations/wb.gaf.gz"
].join(" ")
GOLR_INPUT_PANTHER_TREES = [
"http://skyhook.berkeleybop.org/master/products/panther/arbre.tgz"
].join(" ")
///
/// Groups to run and tests to avoid running during the current
/// mega-make.
///
// The gorule tag is used to identify which rules to suppress
// reports from during the megastep and during templating the
// reports after the megastep. The tags are currently
// respected at two times in the pipeline: the gorules report
// take the flag as a CLI argument, supressing it; ontobio
// takes it during the same stage as the JSON
// generation/parsing step, to supress the .md output. At this
// time, this variable can be either nothing or empty string
// for no rule suppression (default behavior everything), or a
// single value (practically speaking pretty much always
// "silent")
//GORULE_TAGS_TO_SUPPRESS="silent"
// Optional. Groups to run.
RESOURCE_GROUPS="ecocyc goa mgi paint pseudocap wb"
// Optional. Datasets to skip within the resources that we
// will run (defined in the line above).
DATASET_EXCLUDES="goa_uniprot_gcrp goa_pdb goa_chicken_isoform goa_chicken_rna goa_cow goa_cow_complex goa_cow_isoform goa_cow_rna goa_dog goa_dog_complex goa_dog_isoform goa_dog_rna goa_human goa_human goa_human_complex goa_human_rna paint_cgd paint_dictybase paint_fb paint_goa_chicken paint_goa_human paint_other paint_rgd paint_sgd paint_tair paint_zfin"
// Optional. This acts as an override, /if/ it's grabbed (as
// defined above).
GOA_UNIPROT_ALL_URL="http://skyhook.berkeleybop.org/goa_uniprot_short.gaf.gz"
}
options{
timestamps()
buildDiscarder(logRotator(numToKeepStr: '14'))
}
stages {
// Very first: pause for a few minutes to give a chance to
// cancel and clean the workspace before use.
stage('Ready and clean') {
steps {
// Check to make sure we have coherent metadata so we
// don't clobber good products.
watchdog();
// Give us a minute to cancel if we want.
sleep time: 1, unit: 'MINUTES'
cleanWs deleteDirs: true, disableDeferredWipeout: true
}
}
stage('Initialize') {
steps {
///
/// Automatic run variables.
///
// Pin dates and day to beginning of run.
script {
env.START_DATE = sh (
script: 'date +%Y-%m-%d',
returnStdout: true
).trim()
env.START_DAY = sh (
script: 'date +%A',
returnStdout: true
).trim()
}
// Reset base.
initialize();
sh 'env > env.txt'
sh 'echo $BRANCH_NAME > branch.txt'
sh 'echo "$BRANCH_NAME"'
sh 'cat env.txt'
sh 'cat branch.txt'
sh 'echo $START_DAY > dow.txt'
sh 'echo "$START_DAY"'
sh 'echo $START_DATE > date.txt'
sh 'echo "$START_DATE"'
}
}
// Build owltools and get it into the shared filesystem.
stage('Ready production software') {
steps {
parallel(
"Ready owltools": {
// Legacy: build 'owltools-build'
dir('./owltools') {
// Remember that git lays out into CWD.
git 'https://github.com/owlcollab/owltools.git'
sh 'mvn -f OWLTools-Parent/pom.xml -U clean install -DskipTests -Dmaven.javadoc.skip=true -Dsource.skip=true'
// Attempt to rsync produced into bin/.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" OWLTools-Runner/target/owltools [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" OWLTools-Oort/bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" OWLTools-NCBI/bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
sh 'rsync -vhac -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" --exclude ".git" OWLTools-Oort/reporting/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
sh 'rsync -vhac -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" --exclude ".git" OWLTools-Runner/contrib/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
}
}
},
"Ready minerva": {
dir('./minerva') {
// Remember that git lays out into CWD.
git branch: TARGET_MINERVA_BRANCH, url: 'https://github.com/geneontology/minerva.git'
sh './build-cli.sh'
// Attempt to rsync produced into bin/.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" minerva-cli/bin/minerva-cli.* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
}
}
},
"Ready robot": {
// Legacy: build 'robot-build'
dir('./robot') {
// Remember that git lays out into CWD.
git branch: TARGET_ROBOT_BRANCH, url:'https://github.com/kltm/robot-old.git'
// Update the POMs by replacing "SNAPSHOT"
// with the current Git hash. First make
// sure maven-help-plugin is installed
sh 'mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version'
// Now get and set the version.
// Originally: sh 'VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v '\[' | sed 's/-SNAPSHOT//'`'
sh 'VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v \'\\[\' | sed \'s/-SNAPSHOT//\'`'
sh 'BUILD=`git rev-parse --short HEAD`'
sh 'mvn versions:set -DnewVersion=$VERSION+$BUILD'
sh 'mvn -U clean install -DskipTests'
// Attempt to rsync produced into bin/.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
}
}
},
"Ready arachne": {
dir('./arachne') {
sh 'wget -N https://github.com/balhoff/arachne/releases/download/v1.0.2/arachne-1.0.2.tgz'
sh 'tar -xvf arachne-1.0.2.tgz'
// Attempt to rsync produced into bin/.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" arachne-1.0.2/bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
// WARNING/BUG: needed for arachne to
// run at this point.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" arachne-1.0.2/lib/* [email protected]:/home/skyhook/$BRANCH_NAME/lib/'
}
}
},
"Ready blazegraph-runner": {
dir('./blazegraph-runner') {
sh 'wget -N https://github.com/balhoff/blazegraph-runner/releases/download/v1.4/blazegraph-runner-1.4.tgz'
sh 'tar -xvf blazegraph-runner-1.4.tgz'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// Attempt to rsync bin into bin/.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" blazegraph-runner-1.4/bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
// Attempt to rsync libs into lib/.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" blazegraph-runner-1.4/lib/* [email protected]:/home/skyhook/$BRANCH_NAME/lib/'
}
}
},
"Ready Gaferencer": {
dir('./gaferencer') {
sh 'wget -N https://github.com/geneontology/gaferencer/releases/download/v0.5/gaferencer-0.5.tgz'
sh 'tar -xvf gaferencer-0.5.tgz'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// Attempt to rsync bin into bin/.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" gaferencer-0.5/bin/* [email protected]:/home/skyhook/$BRANCH_NAME/bin/'
// Attempt to rsync libs into lib/.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" gaferencer-0.5/lib/* [email protected]:/home/skyhook/$BRANCH_NAME/lib/'
}
}
}
)
}
}
// Download GAFs from datasets.yaml in go-site and then upload
// to skyhook in their appropriate locations.
stage("Download annotation data") {
steps {
dir("./go-site") {
git branch: TARGET_GO_SITE_BRANCH, url: 'https://github.com/geneontology/go-site.git'
script {
def excluded_datasets_args = ""
if ( env.DATASET_EXCLUDES ) {
excluded_datasets_args = DATASET_EXCLUDES.split(" ").collect { "-x ${it}" }.join(" ")
}
def included_resources = ""
if (env.RESOURCE_GROUPS) {
included_resources = RESOURCE_GROUPS.split(" ").collect { "-g ${it}" }.join(" ")
}
def goa_mapping_url = ""
if (env.GOA_UNIPROT_ALL_URL) {
goa_mapping_url = "-m goa_uniprot_all gaf ${GOA_UNIPROT_ALL_URL}"
}
sh "python3 ./scripts/download_source_gafs.py all --datasets ./metadata/datasets --target ./target/ --type gaf ${excluded_datasets_args} ${included_resources} ${goa_mapping_url}"
}
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// Upload to skyhook to the expected location.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./target/* [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data/'
}
}
}
}
// See https://github.com/geneontology/go-ontology for details
// on the ontology release pipeline. This ticket runs
// daily(TODO?) and creates all the files normally included in
// a release, and deploys to S3.
stage('Produce ontology (*)') {
agent {
docker {
// Upgrade test for: geneontology/go-ontology#25019, from v1.2.32
image 'obolibrary/odkfull:v1.4'
// Reset Jenkins Docker agent default to original
// root.
args '-u root:root'
}
}
// CHECKPOINT: Recover key environmental variables.
environment {
START_DOW = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/dow.txt', , returnStdout: true).trim()
START_DATE = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/date.txt', , returnStdout: true).trim()
}
steps {
// Create a relative working directory and setup our
// data environment.
dir('./go-ontology') {
// We're starting to run into problems with
// ontology download taking too long for the
// default 10m, so try and get into the guts of
// the git commands a little. Issues #248.
// git branch: TARGET_GO_ONTOLOGY_BRANCH, url: 'https://github.com/geneontology/go-ontology.git'
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: TARGET_GO_ONTOLOGY_BRANCH]], extensions: [[$class: 'CloneOption', depth: 1, noTags: true, reference: '', shallow: true, timeout: 120]], userRemoteConfigs: [[url: 'https://github.com/geneontology/go-ontology.git', refspec: "+refs/heads/${env.TARGET_GO_ONTOLOGY_BRANCH}:refs/remotes/origin/${env.TARGET_GO_ONTOLOGY_BRANCH}"]]]
// Default namespace.
sh 'env'
dir('./src/ontology') {
retry(3){
sh 'make RELEASEDATE=$START_DATE OBO=http://purl.obolibrary.org/obo ROBOT_ENV="ROBOT_JAVA_ARGS=-Xmx48G" all'
}
retry(3){
sh 'make prepare_release'
}
}
// Make sure that we copy any files there,
// including the core dump of produced.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" target/* [email protected]:/home/skyhook/$BRANCH_NAME/ontology'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY -r target/* [email protected]:/home/skyhook/$BRANCH_NAME/ontology/'
}
// Now that the files are safely away onto skyhook for
// debugging, test for the core dump.
script {
if( WE_ARE_BEING_SAFE_P == 'TRUE' ){
def found_core_dump_p = fileExists 'target/core_dump.owl'
if( found_core_dump_p ){
error 'ROBOT core dump detected--bailing out.'
}
}
}
// Try and force destruction of anything remaining
// on disk after build as cleanup.
sh 'git clean -fx || true'
}
}
}
stage('Minerva generations') {
steps {
parallel(
"Make Noctua GPAD": {
// May be parallelized in the future, but may need to
// serve as input into into mega step.
script {
// Create a relative working directory and setup our
// data environment.
dir('./noctua-models') {
// Attempt to trim/prune/speed up
// noctua-models as we do for
// go-ontology for
// https://github.com/geneontology/pipeline/issues/278
// .
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: TARGET_NOCTUA_MODELS_BRANCH]], extensions: [[$class: 'CloneOption', depth: 1, noTags: true, reference: '', shallow: true, timeout: 120]], userRemoteConfigs: [[url: 'https://github.com/geneontology/noctua-models.git', refspec: "+refs/heads/${env.TARGET_NOCTUA_MODELS_BRANCH}:refs/remotes/origin/${env.TARGET_NOCTUA_MODELS_BRANCH}"]]]
// Make all software products
// available in bin/ (and lib/).
sh 'mkdir -p bin/'
sh 'mkdir -p lib/'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/bin/* ./bin/'
// WARNING/BUG: needed for blazegraph-runner
// to run at this point.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/lib/* ./lib/'
}
sh 'chmod +x bin/*'
// Compile models.
sh 'mkdir -p legacy/gpad'
withEnv(['MINERVA_CLI_MEMORY=128G']){
// "Import" models.
sh './bin/minerva-cli.sh --import-owl-models -f models -j blazegraph.jnl'
// Convert GO-CAM to GPAD.
sh './bin/minerva-cli.sh --lego-to-gpad-sparql --ontology $MINERVA_INPUT_ONTOLOGIES --ontojournal ontojournal.jnl -i blazegraph.jnl --gpad-output legacy/gpad'
}
// Collation.
// Hack for iterating quickly on
// https://github.com/geneontology/pipeline/issues/313 .
sh 'wget -N https://raw.githubusercontent.com/geneontology/go-site/$TARGET_GO_SITE_BRANCH/scripts/collate-gpads.pl'
sh 'perl ./collate-gpads.pl legacy/gpad'
// Rename, compress, and move to skyhook.
sh 'mcp "legacy/*.gpad" "legacy/noctua_#1-src.gpad"'
sh 'gzip -vk legacy/noctua_*.gpad'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY legacy/noctua_*-src.gpad.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data/'
}
}
}
},
"JSON model generation": {
// May be parallelized in the future, but may need to
// serve as input into into mega step.
script {
// Create a relative working directory and setup our
// data environment.
dir('./json-noctua-models') {
// Attempt to trim/prune/speed up
// noctua-models as we do for
// go-ontology for
// https://github.com/geneontology/pipeline/issues/278
// .
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: TARGET_NOCTUA_MODELS_BRANCH]], extensions: [[$class: 'CloneOption', depth: 1, noTags: true, reference: '', shallow: true, timeout: 120]], userRemoteConfigs: [[url: 'https://github.com/geneontology/noctua-models.git', refspec: "+refs/heads/${env.TARGET_NOCTUA_MODELS_BRANCH}:refs/remotes/origin/${env.TARGET_NOCTUA_MODELS_BRANCH}"]]]
// Make all software products
// available in bin/ (and lib/).
sh 'mkdir -p bin/'
sh 'mkdir -p lib/'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/bin/* ./bin/'
// WARNING/BUG: needed for blazegraph-runner
// to run at this point.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/lib/* ./lib/'
}
sh 'chmod +x bin/*'
// Compile models.
sh 'mkdir -p jsonout'
withEnv(['MINERVA_CLI_MEMORY=128G']){
// "Import" models.
sh './bin/minerva-cli.sh --import-owl-models -f models -j blazegraph.jnl'
// JSON out to directory.
sh './bin/minerva-cli.sh --dump-owl-json --journal blazegraph.jnl --ontojournal blazegraph-go-lego-reacto-neo.jnl --folder jsonout'
}
// Compress and out.
sh 'tar --use-compress-program=pigz -cvf noctua-models-json.tgz -C jsonout .'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY noctua-models-json.tgz [email protected]:/home/skyhook/$BRANCH_NAME/products/json/'
}
}
}
}
)
}
}
stage('Produce GAFs, TTLs, and journal (*)') {
agent {
docker {
image 'geneontology/dev-base:ea32b54c822f7a3d9bf20c78208aca452af7ee80_2023-08-28T125255'
args "-u root:root --tmpfs /opt:exec -w /opt"
}
}
// CHECKPOINT: Recover key environmental variables.
environment {
START_DOW = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/dow.txt', , returnStdout: true).trim()
START_DATE = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/date.txt', , returnStdout: true).trim()
}
steps {
// Legacy: build 'gaf-production'
sh "mkdir -p /opt/go-site"
sh "cd /opt/ && git clone -b $TARGET_GO_SITE_BRANCH https://github.com/geneontology/go-site.git"
// sh "pwd"
sh "mkdir -p /opt/bin"
sh "mkdir -p /opt/lib"
sh "mkdir -p /opt/go-site/gaferencer-products"
sh "mkdir -p /opt/go-site/gaferencer-products-tmp"
// git branch: TARGET_GO_SITE_BRANCH, url: 'https://github.com/geneontology/go-site.git'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/bin/* /opt/bin/'
// WARNING/BUG: needed for blazegraph-runner
// to run at this point.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/lib/* /opt/lib/'
// Copy the sources we downloaded earlier to local.
// We're grabbing anything that's gaf, zipped or unzipped. This leaves gpad or anything else behind since currently we only expect gafs
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data/*.gaf* /opt/go-site/sources/'
}
sh "chmod +x /opt/bin/*"
// Install the python requirements.
sh "cd /opt/go-site/scripts && pip3 install -r requirements.txt"
// Re-establish location in the filesystem expected by steps below.
sh "cd /opt/"
sh "python3 /opt/go-site/scripts/download_source_gafs.py organize --datasets /opt/go-site/metadata/datasets --source /opt/go-site/sources --target /opt/go-site/pipeline/target/groups/"
sh "rm /opt/go-site/sources/*"
// Make minimal GAF products.
// sh "cd /opt/go-site/pipeline"
// sh "pwd"
// Gunna need some memory.
// In addition to the memory, try and simulate
// the environment changes for python venv activate.
// Note the complex assignment of VIRTUAL_ENV and PATH.
// https://jenkins.io/doc/pipeline/steps/workflow-basic-steps/#code-withenv-code-set-environment-variables
// "PATH+EXTRA=${WORKSPACE}/go-site/bin:${WORKSPACE}/go-site/pipeline/mypyenv/bin", 'PYTHONHOME=', "VIRTUAL_ENV=${WORKSPACE}/go-site/pipeline/mypyenv", 'PY_ENV=mypyenv', 'PY_BIN=mypyenv/bin'
withEnv(['JAVA_OPTS=-Xmx128G', 'OWLTOOLS_MEMORY=128G', 'BGMEM=128G', "ONTOLOGY=${VALIDATION_ONTOLOGY_URL}", "BRANCH_NAME=${BRANCH_NAME}"]){
// Note environment for future debugging.
// Note: https://issues.jenkins-ci.org/browse/JENKINS-53025 and
// https://issues.jenkins-ci.org/browse/JENKINS-49076
// Just shell out PATH does not work either
// sh 'export PATH=/opt/pipeline/bin:$PATH'
sh 'env > env.txt'
sh 'cat env.txt'
sh 'cd /opt/go-site/pipeline && pip3 install -r requirements.txt'
sh 'cd /opt/go-site/pipeline && pip3 install ../graphstore/rule-runner'
// Get a final accounting of software versions for
// run.
// https://github.com/geneontology/pipeline/issues/208
sh 'pip3 freeze --no-input > pip3_freeze.txt'
sh 'cat pip3_freeze.txt'
// Ready, set...
// Do this thing, but the watchdog sits
// waiting.
timeout(time: 20, unit: 'HOURS') {
script {
/// All branches now try to produce all
/// targets in the go-site Makefile.
sh 'cd /opt/go-site/pipeline && PATH=/opt/bin:$PATH $MAKECMD PY_BIN=/usr/local/bin/ -e target/sparta-report.json'
}
}
}
// Copy products over to skyhook.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// All non-core GAFs to the side in
// products/gaf. Basically:
// - all irregular gaffy files + anything paint-y
// - but not uniprot_all anything (elsewhere)
// - and not any of the ttls
sh 'find /opt/go-site/pipeline/target/groups -type f -regex "^.*\\(\\-src.gaf\\|\\-src.gpi\\|\\_noiea.gaf\\|\\_valid.gaf\\|paint\\_.*\\).gz$" -not -regex "^.*.ttl.gz$" -not -regex "^.*goa_uniprot_all_noiea.gaf.gz$" -not -regex "^.*.ttl.gz$" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data \\;'
// No longer copy goa uniprot all source to products:
// https://github.com/geneontology/pipeline/issues/207
// // Now copy over the (single) uniprot
// // non-core; may not be there in all runs
// // (e.g. speed runs of master).
// script {
// try {
// sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/groups/goa/goa_uniprot_all-src.gaf.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data'
// } catch (exception) {
// echo "NOTE: No goa_uniprot_all-src.gaf.gz found for this run to copy."
// }
// }
// Finally, the non-zipped prediction files.
sh 'find /opt/go-site/pipeline/target/groups -type f -regex "^.*\\-prediction.gaf$" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data \\;'
// Flatten all GAFs and GAF-like products
// onto skyhook. Basically:
// - all product-y files
// - but not uniprot_all anything (elsewhere)
// - and not anything "irregular", like src
sh 'find /opt/go-site/pipeline/target/groups -type f -regex "^.*.\\(gaf\\|gpad\\|gpi\\).gz$" -not -regex "^.*\\(\\-src.gaf\\|\\-src.gpi\\|\\_noiea.gaf\\|\\_valid.gaf\\|noctua_.*\\|paint_.*\\).gz$" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/annotations \\;'
// Now copy over the four uniprot core
// files, if they are in our run set
// (e.g. may not be there on speed runs
// for master).
script {
try {
// No longer copy goa uniprot all source to annotations:
// https://github.com/geneontology/pipeline/issues/207
//sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/groups/goa/goa_uniprot_all.gaf.gz [email protected]:/home/skyhook/$BRANCH_NAME/annotations'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/groups/goa/goa_uniprot_all_noiea.gaf.gz [email protected]:/home/skyhook/$BRANCH_NAME/annotations'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/groups/goa/goa_uniprot_all_noiea.gpi.gz [email protected]:/home/skyhook/$BRANCH_NAME/annotations'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/groups/goa/goa_uniprot_all_noiea.gpad.gz [email protected]:/home/skyhook/$BRANCH_NAME/annotations'
} catch (exception) {
echo "NOTE: At least one uniprot core file not found for this run to copy."
}
}
// Find all {group}.gaferences.json files and combine into one JSON list in one file
sh 'find /opt/go-site/pipeline/target/groups -type f -regex "^.*.gaferences.json$" -exec cp {} /opt/go-site/gaferencer-products-tmp/ \\;'
sh 'python3 /opt/go-site/scripts/json-concat-lists.py /opt/go-site/gaferencer-products-tmp/*.gaferences.json /opt/go-site/gaferencer-products/all.gaferences.json'
// DEBUG: remove debug line later
sh 'ls -AlF /opt/go-site/gaferencer-products'
sh 'pigz /opt/go-site/gaferencer-products/all.gaferences.json'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/gaferencer-products/all.gaferences.json.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/gaferencer'
// Flatten the TTLs into products/ttl/.
sh 'find /opt/go-site/pipeline/target/groups -type f -name "*.ttl.gz" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/products/ttl \\;'
// Compress the journals.
sh 'pigz /opt/go-site/pipeline/target/blazegraph-internal.jnl'
sh 'pigz /opt/go-site/pipeline/target/blazegraph-production.jnl'
// Copy the journals directly to products.
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" /opt/go-site/pipeline/target/blazegraph-production.jnl.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/blazegraph/'
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" /opt/go-site/pipeline/target/blazegraph-internal.jnl.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/blazegraph/'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/blazegraph-production.jnl.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/blazegraph/'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/blazegraph-internal.jnl.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/blazegraph/'
// Copy the reports into reports.
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" /opt/go-site/pipeline/target/sparta-report.json [email protected]:/home/skyhook/$BRANCH_NAME/reports/'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/pipeline/target/sparta-report.json [email protected]:/home/skyhook/$BRANCH_NAME/reports/'
// Plus: flatten product reports in json,
// md reports, text files, etc.
sh 'find /opt/go-site/pipeline/target/groups -type f -regex "^.*\\.\\(json\\|txt\\|md\\)$" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/reports \\;'
script {
try {
// WARNING: This is a hacky fix for https://github.com/geneontology/go-site/issues/1253 .
// It can (should) be removed with an overall flow change in https://github.com/geneontology/go-site/issues/1384 .
sh 'find /opt/go-site/pipeline/target/groups/paint -type f -regex "^.*\\.\\(json\\|txt\\|md\\)$" -exec scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY {} [email protected]:/home/skyhook/$BRANCH_NAME/reports \\;'
} catch (exception) {
echo "NOTE: paint directory does not exist, so no reports to copy"
}
}
}
}
}
// WARNING: This stage is a hack required to work around data damage described in https://github.com/geneontology/go-site/issues/1484 and
// https://github.com/geneontology/pipeline/issues/220.
// Redownload annotations and run ontobio-parse-assocs over them in various ways.
stage('Temporary post filter') {
agent {
docker {
image 'geneontology/dev-base:ea32b54c822f7a3d9bf20c78208aca452af7ee80_2023-08-28T125255'
args "-u root:root --tmpfs /opt:exec -w /opt"
}
}
steps {
// Starting with https://github.com/geneontology/go-site/issues/1484,
// prepare a working directory based around go-site.
sh "cd /opt/ && git clone -b $TARGET_GO_SITE_BRANCH https://github.com/geneontology/go-site.git"
sh "mkdir -p /opt/go-site/annotations /opt/go-site/annotations_new /opt/go-site/gaferencer-products"
sh "cd /opt/go-site/pipeline && pip3 install -r requirements.txt"
// Download gaferencer products and /annotations
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/annotations/* /opt/go-site/annotations/'
// Get rid of goa_uniprot_all_noiea-type products
// as they take too long to run.
sh 'rm -f /opt/go-site/annotations/*uniprot_all* || true'
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/products/gaferencer/all.gaferences.json.gz /opt/go-site/gaferencer-products/'
//sh "$MAKECMD -f /opt/go-site/scripts/Makefile-gaf-reprocess all"
sh "make -f /opt/go-site/scripts/Makefile-gaf-reprocess all"
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/annotations_new/* [email protected]:/home/skyhook/$BRANCH_NAME/annotations'
// sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/gaferencer-products/all.gaferences.json.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/gaferencer/gaferences.json.gz'
// From here, we are making corrections to the Noctua
// GPADs (https://github.com/geneontology/pipeline/issues/220) to fix errors that are
// apparent in the model upstream.
sh "mkdir -p /opt/go-site/noctua_sources /opt/go-site/noctua_target"
// Download source noctua files from skyhook
// Download noctua_*.gpad.gz from products/upstream_and_raw_data/ in skyhook
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data/noctua_*-src.gpad.gz /opt/go-site/noctua_sources/'
// Do we need GPI files for GO Rules? Maybe? Try and see if these are needed for GO Rules.
// Run the noctua gpad through ontobio
withEnv(["ONTOLOGY=${VALIDATION_ONTOLOGY_URL}"]){
sh "make -f /opt/go-site/scripts/Makefile-gaf-reprocess noctua_gpad"
}
// Upload result files to skyhook
// Upload noctua valid to skyhook
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/noctua_target/noctua*.gpad.gz [email protected]:/home/skyhook/$BRANCH_NAME/products/upstream_and_raw_data'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /opt/go-site/noctua_target/*.report.* [email protected]:/home/skyhook/$BRANCH_NAME/reports'
}
}
}
// A new step to think about. What is our core metadata?
stage('Produce metadata') {
steps {
// Prep a copyover point, as the overhead for doing
// large i/o over sshfs seems /really/ high.
sh 'mkdir -p $WORKSPACE/copyover/ || true'
// Mount the remote filesystem.
sh 'mkdir -p $WORKSPACE/mnt/ || true'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'sshfs -oStrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY -o idmap=user [email protected]:/home/skyhook $WORKSPACE/mnt/'
}
// Copy over the files that we want to work on--both
// annotations/ and reports/ (which we separated
// earlier).
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/annotations/* $WORKSPACE/copyover/'
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/reports/* $WORKSPACE/copyover/'
script {
try {
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/products/upstream_and_raw_data/paint_* $WORKSPACE/copyover/'
} catch (exception) {
// No PAINT files this run? It could happen if
// on a limited run with only non-PAINT
// resources involved (e.g. speed run master).
echo "NOTE: No PAINT files were found for this run to copy."
}
}
// Make all software products available in bin/.
sh 'mkdir -p $WORKSPACE/bin/ || true'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" [email protected]:/home/skyhook/$BRANCH_NAME/bin/* $WORKSPACE/bin/'
}
sh 'chmod +x $WORKSPACE/bin/*'
// Prepare a working directory based around go-site.
dir('./go-site') {
git branch: TARGET_GO_SITE_BRANCH, url: 'https://github.com/geneontology/go-site.git'
// Generate interesting PANTHER information
// (.arbre files) based on upstream source.
sh 'wget -N http://data.pantherdb.org/PANTHER$PANTHER_VERSION/globals/tree_files.tar.gz'
sh 'wget -N http://data.pantherdb.org/PANTHER$PANTHER_VERSION/globals/names.tab'
sh 'tar -zxvf tree_files.tar.gz'
sh 'python3 ./scripts/prepare-panther-arbre-directory.py -v --names names.tab --trees tree_files --output arbre'
sh 'tar --use-compress-program=pigz -cvf arbre.tgz -C arbre .'
sh 'mv arbre.tgz $WORKSPACE/mnt/$BRANCH_NAME/products/panther'
// Generate combined annotation and assigned-by combined report for driving
// annotation download pages and drop it into
// reports/ for copyover.
sh 'python3 ./scripts/aggregate-json-reports.py -v --directory $WORKSPACE/copyover --metadata ./metadata/datasets --output ./combined.report.json'
sh 'python3 ./scripts/combined_assigned_by.py -v --input ./combined.report.json --output ./assigned-by-combined-report.json'
// Generate the static download page directly from
// the metadata.
sh 'python3 ./scripts/downloads-page-gen.py -v --report ./combined.report.json --date $START_DATE --inject ./scripts/downloads-page-template.html > ./downloads.html'
// Generate the a users.yaml report for missing
// data in the GO pattern.
sh 'python3 ./scripts/sanity-check-users-and-groups.py --users metadata/users.yaml --groups metadata/groups.yaml > ./users-and-groups-report.txt'
// WARNING: Caveats and reasons as above. Started
// as be need* to process frontmatter using our
// in-house "yamldown" parser.
sh 'python3 -m venv mypyenv'
withEnv(["PATH+EXTRA=${WORKSPACE}/go-site/bin:${WORKSPACE}/go-site/mypyenv/bin", 'PYTHONHOME=', "VIRTUAL_ENV=${WORKSPACE}/go-site/mypyenv", 'PY_ENV=mypyenv', 'PY_BIN=mypyenv/bin']){
// "External" packages required to run these
// scripts.
sh 'python3 ./mypyenv/bin/pip3 install --force-reinstall click==7.1.2'
sh 'python3 ./mypyenv/bin/pip3 install --force-reinstall pystache==0.5.4'
sh 'python3 ./mypyenv/bin/pip3 install yamldown'
sh 'python3 ./mypyenv/bin/pip3 install pypandoc'
// Generate the static overall gorule report
// page.
// Build either a release or testing
// version of a generic BDBag/DOI
// workflow, keeping special bucket
// mappings in mind.
script {
if( env.GORULE_TAGS_TO_SUPPRESS && env.GORULE_TAGS_TO_SUPPRESS != "" ){
sh 'python3 ./scripts/reports-page-gen.py --report ./combined.report.json --template ./scripts/reports-page-template.html --date $START_DATE --suppress-rule-tag $GORULE_TAGS_TO_SUPPRESS > gorule-report.html'
sh 'python3 ./scripts/reports-page-gen.py --report ./assigned-by-combined-report.json --template ./scripts/assigned-by-reports-page-template.html --date $START_DATE --suppress-rule-tag $GORULE_TAGS_TO_SUPPRESS > assigned-by-gorule-report.html'
}else{
sh 'python3 ./scripts/reports-page-gen.py --report ./combined.report.json --template ./scripts/reports-page-template.html --date $START_DATE > gorule-report.html'
sh 'python3 ./scripts/reports-page-gen.py --report ./assigned-by-combined-report.json --template ./scripts/assigned-by-reports-page-template.html --date $START_DATE > assigned-by-gorule-report.html'
}
}
// Generate the new GO refs data.
sh 'python3 ./scripts/aggregate-references.py -v --directory ./metadata/gorefs --json ./metadata/go-refs.json --stanza ./metadata/GO.references'
}
// Get the date into the metadata, in a similar format
// to what is produced by the Zenodo sections.
sh 'echo \'{\' > ./metadata/release-date.json'
sh 'echo -n \' "date": "\' >> ./metadata/release-date.json'
sh 'echo -n "$START_DATE" >> ./metadata/release-date.json'
sh 'echo \'"\' >> ./metadata/release-date.json'
sh 'echo \'}\' >> ./metadata/release-date.json'
// Some scripts that require NPM.
withEnv(['PATH+EXTRA=../bin:node_modules/.bin']){
sh 'npm install'
// Generate the TTL from users.yaml and
// groups.yaml. This is meant to be an
// unwinding of the somewhat too hard-coded
// go-site/scripts/yaml2turtle.sh from Jim.
//sh 'GRPTEMP=`mktemp --tmpdir=. --suffix=.jsonld`'
sh 'echo \'{"@context": \' > ./metadata/groups.tmp.jsonld'
sh 'yaml2json ./metadata/users-groups-context.yaml >> ./metadata/groups.tmp.jsonld'
sh 'echo \', "@graph": \' >> ./metadata/groups.tmp.jsonld'
sh 'yaml2json metadata/groups.yaml >> ./metadata/groups.tmp.jsonld'
sh 'echo \'}\' >> ./metadata/groups.tmp.jsonld'
sh 'robot convert -i ./metadata/groups.tmp.jsonld -o ./metadata/groups.ttl'
//sh 'USRTEMP=`mktemp --tmpdir=. --suffix=.jsonld`'
sh 'echo \'{"@context": \' > ./metadata/users.tmp.jsonld'
sh 'yaml2json ./metadata/users-groups-context.yaml >> ./metadata/users.tmp.jsonld'
sh 'echo \', "@graph": \' >> ./metadata/users.tmp.jsonld'
sh 'yaml2json metadata/users.yaml >> ./metadata/users.tmp.jsonld'
sh 'echo \'}\' >> ./metadata/users.tmp.jsonld'
sh 'robot convert -i ./metadata/users.tmp.jsonld -o ./metadata/users.ttl'
// Convert db-xrefs into the legacy xrefs
// formats.
sh 'yaml2json -p ./metadata/db-xrefs.yaml > ./metadata/db-xrefs.json'
sh 'node ./scripts/db-xrefs-yaml2legacy.js -i ./metadata/db-xrefs.yaml > ./metadata/db-xrefs.legacy'
sh 'cp ./metadata/db-xrefs.legacy ./metadata/GO.xrf_abbs'
// Contraints for Alex.
sh 'yaml2json -p ./metadata/eco-usage-constraints.yaml > ./metadata/eco-usage-constraints.json'
}
// Carry everything we want to save over to
// skyhook.
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// Copy all upstream metadata into metadata folder.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" metadata/* [email protected]:/home/skyhook/$BRANCH_NAME/metadata'
// Copy all of the reports to the reports
// directory.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./combined.report.json [email protected]:/home/skyhook/$BRANCH_NAME/reports'
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./aggregate-rule-violation-report.md [email protected]:/home/skyhook/$BRANCH_NAME/reports'
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./users-and-groups-report.txt [email protected]:/home/skyhook/$BRANCH_NAME/reports'
// Copy generated pages over to page output.
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./downloads.html [email protected]:/home/skyhook/$BRANCH_NAME/products/pages'
// Copy gorule report page to the reports directory
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./gorule-report.html [email protected]:/home/skyhook/$BRANCH_NAME/reports'
// Copy overall assigned-by pages to the
// reports directory
sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" ./assigned-by-*.* [email protected]:/home/skyhook/$BRANCH_NAME/reports'
}
// Produce the slightly improved combined reports
// inplace on remote.
sh 'python3 ./scripts/merge-all-reports.py --verbose --directory $WORKSPACE/mnt/$BRANCH_NAME/reports'
}
// Run and report shared annotation check.
dir('./shared-annotation-check') {
git url: 'https://github.com/geneontology/shared-annotation-check.git'
// Setup.
withEnv(['PATH+EXTRA=../bin:node_modules/.bin']){
sh 'npm install'
// Run annotation checks.
sh 'node ./check-runner.js -i ./rules.txt -o $WORKSPACE/mnt/$BRANCH_NAME/reports/shared-annotation-check.html'
}
}
}
// WARNING: Extra safety as I expect this to sometimes fail.
post {
always {
// Bail on the remote filesystem.
sh 'fusermount -u $WORKSPACE/mnt/ || true'
// Purge the copyover point.
sh 'rm -r -f $WORKSPACE/copyover || true'
}
}
}
stage('Sanity I') {
steps {
// Prep a copyover point, as the overhead for doing
// large i/o over sshfs seems /really/ high.
sh 'mkdir -p $WORKSPACE/copyover/ || true'
// Mount the remote filesystem.
sh 'mkdir -p $WORKSPACE/mnt/ || true'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
sh 'sshfs -oStrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY -o idmap=user [email protected]:/home/skyhook $WORKSPACE/mnt/'
}
// Copy over the files that we want to work on--both
// annotations/ and reports/ (which we separated
// earlier).
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/annotations/* $WORKSPACE/copyover/'
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/products/upstream_and_raw_data/* $WORKSPACE/copyover/'
sh 'cp $WORKSPACE/mnt/$BRANCH_NAME/reports/* $WORKSPACE/copyover/'
// Ready...
dir('./go-site') {
git branch: TARGET_GO_SITE_BRANCH, url: 'https://github.com/geneontology/go-site.git'
// Run sanity checks.
sh 'python3 ./scripts/sanity-check-ann-report.py -v -d $WORKSPACE/copyover/ --ignore_noctua'
// Make sure that the SPARTA report has nothing
// nasty in it.
// Note: Used to be pipes (|), but Jenkins Pipeline shell
// commands do not apparently respect that.
sh 'jq \'.build\' $WORKSPACE/copyover/sparta-report.json > $WORKSPACE/build-status.txt'
sh 'grep -v \'fail\' $WORKSPACE/build-status.txt'
}
}
// WARNING: Extra safety as I expect this to sometimes fail.
post {
always {
// Bail on the remote filesystem.
sh 'fusermount -u $WORKSPACE/mnt/ || true'
// Purge the copyover point.
sh 'rm -r -f $WORKSPACE/copyover || true'
}
}
}
//...
stage('Produce derivatives (*)') {
agent {
docker {
image 'geneontology/golr-autoindex:28a693d28b37196d3f79acdea8c0406c9930c818_2022-03-17T171930_master'
// Reset Jenkins Docker agent default to original
// root.
args '-u root:root --mount type=tmpfs,destination=/srv/solr/data'
}
}
// CHECKPOINT: Recover key environmental variables.
environment {
START_DOW = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/dow.txt', , returnStdout: true).trim()
START_DATE = sh(script: 'curl http://skyhook.berkeleybop.org/$BRANCH_NAME/metadata/date.txt', , returnStdout: true).trim()
}
steps {
// Build index into tmpfs.
sh 'bash /tmp/run-indexer.sh'
// Immediately check to see if it looks like we have
// enough docs when trying a
// release. SANITY_SOLR_DOC_COUNT_MIN must be greater
// than what we seen in the index.
script {
if( env.BRANCH_NAME == 'release' ){
// Test overall.
echo "SANITY_SOLR_DOC_COUNT_MIN:${env.SANITY_SOLR_DOC_COUNT_MIN}"
sh 'curl "http://localhost:8080/solr/select?q=*:*&rows=0&wt=json"'
sh 'if [ $SANITY_SOLR_DOC_COUNT_MIN -gt $(curl "http://localhost:8080/solr/select?q=*:*&rows=0&wt=json" | grep -oh \'"numFound":[[:digit:]]*\' | grep -oh [[:digit:]]*) ]; then exit 1; else echo "We seem to be clear wrt doc count"; fi'
// Test bioentity.
echo "SANITY_SOLR_BIOENTITY_DOC_COUNT_MIN:${env.SANITY_SOLR_BIOENTITY_DOC_COUNT_MIN}"
sh 'curl "http://localhost:8080/solr/select?q=*:*&rows=0&wt=json&fq=document_category:bioentity"'
sh 'if [ $SANITY_SOLR_BIOENTITY_DOC_COUNT_MIN -gt $(curl "http://localhost:8080/solr/select?q=*:*&rows=0&wt=json&fq=document_category:bioentity" | grep -oh \'"numFound":[[:digit:]]*\' | grep -oh [[:digit:]]*) ]; then exit 1; else echo "We seem to be clear wrt doc count"; fi'
}
}
// Copy tmpfs Solr contents onto skyhook.
sh 'tar --use-compress-program=pigz -cvf /tmp/golr-index-contents.tgz -C /srv/solr/data/index .'
withCredentials([file(credentialsId: 'skyhook-private-key', variable: 'SKYHOOK_IDENTITY')]) {
// Copy over index.
// Copy over log.
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" /tmp/golr-index-contents.tgz [email protected]:/home/skyhook/$BRANCH_NAME/products/solr/'
//sh 'rsync -avz -e "ssh -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY" /tmp/golr_timestamp.log [email protected]:/home/skyhook/$BRANCH_NAME/products/solr/'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /tmp/golr-index-contents.tgz [email protected]:/home/skyhook/$BRANCH_NAME/products/solr/'
sh 'scp -o StrictHostKeyChecking=no -o IdentitiesOnly=true -o IdentityFile=$SKYHOOK_IDENTITY /tmp/golr_timestamp.log [email protected]:/home/skyhook/$BRANCH_NAME/products/solr/'
}
// Solr should still be running in the background here
// from indexing--create stats products from running
// GOlr.
// Prepare a working directory based around go-site.
dir('./go-stats') {
git branch: TARGET_GO_STATS_BRANCH, url: 'https://github.com/geneontology/go-stats.git'
// Not much want or need here--simple
// python3. However, using the information hidden
// in run-indexer.sh to know where the Solr
// instance is hiding.
sh 'mkdir -p /tmp/stats/ || true'
sh 'cp ./libraries/go-stats/*.py /tmp'
// Needed as extra library.
sh 'pip3 install --force-reinstall requests==2.19.1'