From 22ea33292bb4556793a974d0506ffd9fe99333ab Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:06:11 -0400 Subject: [PATCH 01/15] added quotes --- paperingest | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paperingest b/paperingest index a09f557..d3f05ff 100755 --- a/paperingest +++ b/paperingest @@ -1,7 +1,7 @@ #!/bin/bash REQUIRECONFIG="Y" -SCRIPTDIR=$(dirname "${0}") +SCRIPTDIR="$(dirname "${0}")" . "${SCRIPTDIR}/mmfunctions" || { echo "Missing '${SCRIPTDIR}/mmfunctions'. Exiting." ; exit 1 ;}; _cleanup(){ @@ -40,9 +40,9 @@ open -a /Applications/Preview.app/ "${FIRST}" "${ORIGDIR}/${LAST}" END=$(date -u "+%Y%m%dT%H%M%SZ") SYSTEM_DATA=$(system_profiler SPHardwareDataType) #These retrieved the right info on the Mac I'm using, but I don't know how standard the output is -SERIAL_NUMBER=$(echo "${SYSTEM_DATA}" | grep "Serial Number" | awk '{ print $4 }') -MODEL=$(echo "${SYSTEM_DATA}" | grep "Model Identifier" | awk '{ print $3; }') OS=$(system_profiler SPSoftwareDataType | grep "System Version" | awk '{ print substr(${0}, index(${0},$3)); }') +SERIAL_NUMBER="$(echo "${SYSTEM_DATA}" | grep "Serial Number" | awk '{ print $4 }')" +MODEL="$(echo "${SYSTEM_DATA}" | grep "Model Identifier" | awk '{ print $3; }')" echo "datetime_start: ${START}" >> "${LOGDIR}/capture.log" echo "datetime_end: ${END}" >> "${LOGDIR}/capture.log" echo "serial number: ${SERIAL_NUMBER}" >> "${LOGDIR}/capture.log" From 8efbc3282d25999de7e38f14e0cf665abf698e75 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:07:17 -0400 Subject: [PATCH 02/15] fixed preview of first and last tiff --- paperingest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paperingest b/paperingest index d3f05ff..bc38ac9 100755 --- a/paperingest +++ b/paperingest @@ -36,7 +36,7 @@ exec &> "${LOGDIR}/fscanx_process.txt" eval "${COMMAND}" FIRST=$(find "${ORIGDIR}" -type f -mindepth 1 -maxdepth 1 ! -name ".*" -exec ls -1rt '{}' \; | head -n 1) LAST=$(ls -1t "${ORIGDIR}" | head -n 1) -open -a /Applications/Preview.app/ "${FIRST}" "${ORIGDIR}/${LAST}" +open "${FIRST}" "${ORIGDIR}/${LAST}" END=$(date -u "+%Y%m%dT%H%M%SZ") SYSTEM_DATA=$(system_profiler SPHardwareDataType) #These retrieved the right info on the Mac I'm using, but I don't know how standard the output is From f0a5f9a3e63bb3d51b02006711ef83ec65c2da92 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:07:58 -0400 Subject: [PATCH 03/15] fixed awk error --- paperingest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paperingest b/paperingest index bc38ac9..eb7c851 100755 --- a/paperingest +++ b/paperingest @@ -40,9 +40,9 @@ open "${FIRST}" "${ORIGDIR}/${LAST}" END=$(date -u "+%Y%m%dT%H%M%SZ") SYSTEM_DATA=$(system_profiler SPHardwareDataType) #These retrieved the right info on the Mac I'm using, but I don't know how standard the output is -OS=$(system_profiler SPSoftwareDataType | grep "System Version" | awk '{ print substr(${0}, index(${0},$3)); }') SERIAL_NUMBER="$(echo "${SYSTEM_DATA}" | grep "Serial Number" | awk '{ print $4 }')" MODEL="$(echo "${SYSTEM_DATA}" | grep "Model Identifier" | awk '{ print $3; }')" +OS="$(system_profiler SPSoftwareDataType | grep "System Version" | cut -d ":" -f 2- | awk '{$1=$1;print}')" echo "datetime_start: ${START}" >> "${LOGDIR}/capture.log" echo "datetime_end: ${END}" >> "${LOGDIR}/capture.log" echo "serial number: ${SERIAL_NUMBER}" >> "${LOGDIR}/capture.log" From 14e79c07b1f63675b847d2a574bee3873775592b Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:12:46 -0400 Subject: [PATCH 04/15] created function to scan a page --- paperingest | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/paperingest b/paperingest index eb7c851..c0eaada 100755 --- a/paperingest +++ b/paperingest @@ -16,6 +16,11 @@ _ask_operator _ask_mediaid +_scan_page(){ + COMMAND="fscanx "${MIDDLE_OPTIONS[@]}" --rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib '${ORIGDIR}/${MEDIAID}_${COUNTER}.tif'" + exec & >> "${LOGDIR}/fscanx_process.txt" + eval "${COMMAND}" +} if [ -d "${OUTDIR_PAPER}/${MEDIAID}" ] ; then _report -wdt "It looks like this ${MEDIAID} was already scanned. If you want to overwrite the existing one please delete ${MEDIAID} first and then try again." exit @@ -31,9 +36,6 @@ mkdir -p "${LOGDIR}" START=$(date -u "+%Y%m%dT%H%M%SZ") -COMMAND="fscanx --adf --rgb --bits 8 --resolution 600 --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib '${ORIGDIR}/${MEDIAID}_.tif'" -exec &> "${LOGDIR}/fscanx_process.txt" -eval "${COMMAND}" FIRST=$(find "${ORIGDIR}" -type f -mindepth 1 -maxdepth 1 ! -name ".*" -exec ls -1rt '{}' \; | head -n 1) LAST=$(ls -1t "${ORIGDIR}" | head -n 1) open "${FIRST}" "${ORIGDIR}/${LAST}" From 69f84c46a9954602f9b6344b7d4506cef1bfed34 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:13:53 -0400 Subject: [PATCH 05/15] created functions for users to input scan type and no of pages --- paperingest | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/paperingest b/paperingest index c0eaada..e0b88a0 100755 --- a/paperingest +++ b/paperingest @@ -9,6 +9,23 @@ _cleanup(){ exit 1 } +_ask_scantype(){ + if [ -z "${SCANTYPE}" ] ; then + _report -qn "Enter 'a' to use ADF or 'f' to use flatbed or 'q' to quit: " + read -e SCANTYPE + [ -z "${SCANTYPE}" ] && _ask_scantype + [[ "${SCANTYPE}" = "q" ]] && exit 0 + fi +} + +_ask_doubleside(){ + if [ -z "${DOUBLESIDE}" ] ; then + _report -qn "Enter 1 if scanning only front or 2 if scanning both front and back or 'q' to quit: " + read -e DOUBLESIDE + [ -z "${DOUBLESIDE}" ] && _ask_doubleside + [[ "${DOUBLESIDE}" = "q" ]] && exit 0 + fi +} trap _cleanup SIGHUP SIGINT SIGTERM _log -b From 638034a8597af99ce51b3f422c29e23359f0a4a1 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:15:07 -0400 Subject: [PATCH 06/15] created function to remove trailing 1 in flatbed scanned tiffs --- paperingest | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paperingest b/paperingest index e0b88a0..311759e 100755 --- a/paperingest +++ b/paperingest @@ -51,6 +51,10 @@ LOGDIR="${OUTDIR_PAPER}/${MEDIAID}/metadata/logs" mkdir -p "${ORIGDIR}" mkdir -p "${LOGDIR}" +_file_rename(){ + for file in ${ORIGDIR}/*1.tif ; do mv -v -n "${file}" "${file//1.tif/.tiff}" ; done + +} START=$(date -u "+%Y%m%dT%H%M%SZ") FIRST=$(find "${ORIGDIR}" -type f -mindepth 1 -maxdepth 1 ! -name ".*" -exec ls -1rt '{}' \; | head -n 1) From 0d2b79b5f9b82687dd434a6aa67afda0aa9a1442 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:17:39 -0400 Subject: [PATCH 07/15] block to scan either flatbed or through ADF --- paperingest | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/paperingest b/paperingest index 311759e..2136e5d 100755 --- a/paperingest +++ b/paperingest @@ -57,6 +57,36 @@ _file_rename(){ } START=$(date -u "+%Y%m%dT%H%M%SZ") +if [[ "${SCANTYPE}" == "a" ]] ; then + MIDDLE_OPTIONS+=(--adf) + _ask_doubleside + if [[ "${DOUBLESIDE}" == 2 ]] ; then + MIDDLE_OPTIONS+=(--duplex) + elif [[ "${DOUBLESIDE}" == 1 ]] ; then + break + else + _report -w "You said ${DOUBLESIDE} for the number of pages which is not valid, use 1 or 2." + fi + _scan_page +elif [[ "${SCANTYPE}" == "f" ]] ; then + MIDDLE_OPTIONS+=(--flatbed) + COUNTER=1 + _report -d -n "Hit enter to scan a page or q to stop scanning pages: " + read PAGE_ANSWER + + while [[ ! "${PAGE_ANSWER}" = "q" ]] ; do + _scan_page + _file_rename + ((COUNTER++)) + _report -d -n "Hit enter to scan a page or q to stop scanning pages (next page is ${COUNTER}): " + read PAGE_ANSWER + done + +else + _report -w "You said ${SCANTYPE} for the scantype which is not valid, use 'a' or 'f'." + exit 1 +fi + FIRST=$(find "${ORIGDIR}" -type f -mindepth 1 -maxdepth 1 ! -name ".*" -exec ls -1rt '{}' \; | head -n 1) LAST=$(ls -1t "${ORIGDIR}" | head -n 1) open "${FIRST}" "${ORIGDIR}/${LAST}" From e9d91f2033ae7ec8a5c50d247254e60eb4f9c921 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 10:18:13 -0400 Subject: [PATCH 08/15] ask user input for scan type and added it to log --- paperingest | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paperingest b/paperingest index 2136e5d..5fcfdae 100755 --- a/paperingest +++ b/paperingest @@ -33,6 +33,7 @@ _ask_operator _ask_mediaid +_ask_scantype _scan_page(){ COMMAND="fscanx "${MIDDLE_OPTIONS[@]}" --rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib '${ORIGDIR}/${MEDIAID}_${COUNTER}.tif'" exec & >> "${LOGDIR}/fscanx_process.txt" @@ -103,6 +104,7 @@ echo "model id: ${MODEL}" >> "${LOGDIR}/capture.log" echo "os: ${OS}" >> "${LOGDIR}/capture.log" echo "identifier: ${MEDIAID}" >> "${LOGDIR}/capture.log" echo "operator: ${OP}" >> "${LOGDIR}/capture.log" +echo "scantype: ${SCANTYPE}" >> "${LOGDIR}/capture.log" echo "command: ${COMMAND}" >> "${LOGDIR}/capture.log" echo done scanning "${MEDIAID}" From 8c654c2f9b4d74b693c195ee25a1c402acc92828 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Tue, 11 Jul 2023 15:36:01 -0400 Subject: [PATCH 09/15] ask user for document type, store it and add it to log --- paperingest | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/paperingest b/paperingest index 5fcfdae..9675054 100755 --- a/paperingest +++ b/paperingest @@ -26,6 +26,15 @@ _ask_doubleside(){ [[ "${DOUBLESIDE}" = "q" ]] && exit 0 fi } + +_ask_doctype(){ + if [ -z "${DOCTYPE}" ] ; then + _report -qn "If document is from typewriter, enter 't', if from computer, enter 'c', if handwritten, enter 'h', if other, enter 'o': " + read -e DOCTYPE + [ -z "${DOCTYPE}" ] && _ask_doctype + [[ "${DOCTYPE}" = "q" ]] && exit 0 + fi +} trap _cleanup SIGHUP SIGINT SIGTERM _log -b @@ -34,6 +43,7 @@ _ask_operator _ask_mediaid _ask_scantype +_ask_doctype _scan_page(){ COMMAND="fscanx "${MIDDLE_OPTIONS[@]}" --rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib '${ORIGDIR}/${MEDIAID}_${COUNTER}.tif'" exec & >> "${LOGDIR}/fscanx_process.txt" @@ -105,6 +115,7 @@ echo "os: ${OS}" >> "${LOGDIR}/capture.log" echo "identifier: ${MEDIAID}" >> "${LOGDIR}/capture.log" echo "operator: ${OP}" >> "${LOGDIR}/capture.log" echo "scantype: ${SCANTYPE}" >> "${LOGDIR}/capture.log" +echo "doctype: ${DOCTYPE}" >> "${LOGDIR}/capture.log" echo "command: ${COMMAND}" >> "${LOGDIR}/capture.log" echo done scanning "${MEDIAID}" From 9d0941f47a0295426b9abf52f8a724298cdfed95 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Wed, 12 Jul 2023 14:28:14 -0400 Subject: [PATCH 10/15] store doctype from user input --- paperingest | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/paperingest b/paperingest index 9675054..b6ef4bc 100755 --- a/paperingest +++ b/paperingest @@ -28,11 +28,23 @@ _ask_doubleside(){ } _ask_doctype(){ - if [ -z "${DOCTYPE}" ] ; then + if [ -z "${DOCTYPE_ANSWER}" ] ; then _report -qn "If document is from typewriter, enter 't', if from computer, enter 'c', if handwritten, enter 'h', if other, enter 'o': " - read -e DOCTYPE - [ -z "${DOCTYPE}" ] && _ask_doctype - [[ "${DOCTYPE}" = "q" ]] && exit 0 + read -e DOCTYPE_ANSWER + [ -z "${DOCTYPE_ANSWER}" ] && _ask_doctype + [[ "${DOCTYPE_ANSWER}" = "q" ]] && exit 0 + + if [[ "${DOCTYPE_ANSWER}" = "t" ]] ; then + DOCTYPE="typewriter" + elif [[ "${DOCTYPE_ANSWER}" = "c" ]] ; then + DOCTYPE="computer" + elif [[ "${DOCTYPE_ANSWER}" = "h" ]] ; then + DOCTYPE="handwritten" + elif [[ "${DOCTYPE_ANSWER}" = "o" ]] ; then + DOCTYPE="other" + else + _report -w "You said ${DOCTYPE_ANSWER} which is not valid." + fi fi } trap _cleanup SIGHUP SIGINT SIGTERM From c75d8e50d7020bab1d18078b0777c5ef9911a890 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Wed, 12 Jul 2023 15:08:07 -0400 Subject: [PATCH 11/15] added branch for use of whitelist --- makepdf | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/makepdf b/makepdf index 787f312..4cf33c2 100755 --- a/makepdf +++ b/makepdf @@ -50,6 +50,7 @@ while [ "${*}" != "" ] ; do OUTPUTDIR="${OUTPUTDIR_FORCED}" LOGDIR="${OUTPUTDIR}/logs" fi + INGESTLOG="${LOGDIR}/capture.log" OUTPUTDIRTEXT="${INPUT}/objects/access/txt_1" _run mkdir -p "${LOGDIR}" exec > >(tee "${LOGDIR}/$(basename "${0}")_$(_get_iso8601_c)_$(basename "${0}")_${VERSION}.txt") @@ -75,7 +76,7 @@ while [ "${*}" != "" ] ; do TMP_JPG_DIR="${TMP_MAKEPDF_DIR}/jpgs" _run mkdir -p "${TMP_MAKEPDF_DIR}" "${TMP_JPG_DIR}" "${OUTPUTDIRTEXT}" - for TIF in $(find "${SOURCEDIR}" -maxdepth 1 -mindepth 1 -iname "*.tif" -type f | sort) ; do + for TIF in $(find "${SOURCEDIR}" -maxdepth 1 -mindepth 1 \( -iname "*.tif" -o -iname "*.tiff" \) -type f | sort) ; do tifname="$(basename "${TIF}")" _report -dt "Working on ${tifname}..." pageno="$(echo "${tifname}" | cut -d_ -f2 | cut -d. -f1)" @@ -95,20 +96,30 @@ while [ "${*}" != "" ] ; do if [[ ! -s "${JPG_NAME}" ]] ; then ffmpeg -hide_banner -nostdin -i "${TIF}" -pix_fmt yuvj420p -s 1275x1650 "${JPG_NAME}" fi - TESSERACT_CONFIG=(-c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^\&*(){}[]\|\"':;?/>.<,~\` " -c textord_min_linesize=2.25 -c preserve_interword_spaces=1) + + if [[ -f "$INGESTLOG" ]] ; then + DOCTYPE=$(_readingestlog "doctype") + fi + + if [[ "${DOCTYPE}" == "t" ]] ; then + TESSERACT_CONFIG=(-c tessedit_char_whitelist="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^\&*(){}[]\|\"':;?/>.<,~\` " -c textord_min_linesize=2.25 -c preserve_interword_spaces=1) + _report -dt "ATTENTION: Character whitelist will be used for reading." + elif [[ "${DOCTYPE}" == "c" ]] ; then + TESSERACT_CONFIG=(-c textord_min_linesize=2.25 -c preserve_interword_spaces=1) + fi tesseract "${JPG_NAME}" "${TMP_JPG_DIR}/${TIF_BASE_NAME}" -l eng --psm 4 "${TESSERACT_CONFIG[@]}" pdf tesseract "${JPG_NAME}" "${TMP_JPG_DIR}/${TIF_BASE_NAME}" -l eng --psm 4 "${TESSERACT_CONFIG[@]}" txt done - _report -dt "Checking for PBCore data" - SCRIPT_TITLE=$(fmpbcore "${MEDIAID}" | xmlstarlet 'select' -N "p=http://www.pbcore.org/PBCore/PBCoreNamespace.html" -t -v "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreTitle[@titleType='Series']" -o ": " -v "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreTitle[@titleType='Episode']") - if [[ -n "${SCRIPT_TITLE}" ]] ; then - MIDDLE_OPTIONS+=(--pdftitle "${SCRIPT_TITLE}") - fi - SCRIPT_AUTHOR=$(fmpbcore "${MEDIAID}" | xmlstarlet 'select' -N "p=http://www.pbcore.org/PBCore/PBCoreNamespace.html" -t -m "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreCreator" -v "p:creatorRole" -o ": " -v "p:creator" -o " ; ") - if [[ -n "${SCRIPT_AUTHOR}" ]] ; then - MIDDLE_OPTIONS+=(--pdfauthor "${SCRIPT_AUTHOR}") - fi + #_report -dt "Checking for PBCore data" + #SCRIPT_TITLE=$(fmpbcore "${MEDIAID}" | xmlstarlet 'select' -N "p=http://www.pbcore.org/PBCore/PBCoreNamespace.html" -t -v "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreTitle[@titleType='Series']" -o ": " -v "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreTitle[@titleType='Episode']") + #if [[ -n "${SCRIPT_TITLE}" ]] ; then + # MIDDLE_OPTIONS+=(--pdftitle "${SCRIPT_TITLE}") + #fi + #SCRIPT_AUTHOR=$(fmpbcore "${MEDIAID}" | xmlstarlet 'select' -N "p=http://www.pbcore.org/PBCore/PBCoreNamespace.html" -t -m "/p:pbcoreCollection/p:pbcoreDescriptionDocument/p:pbcoreCreator" -v "p:creatorRole" -o ": " -v "p:creator" -o " ; ") + #if [[ -n "${SCRIPT_AUTHOR}" ]] ; then + # MIDDLE_OPTIONS+=(--pdfauthor "${SCRIPT_AUTHOR}") + #fi pdfjoin "${MIDDLE_OPTIONS[@]}" --pdfkeywords "${MEDIAID}" --fitpaper 'false' --rotateoversize 'false' --paper letter $(find "${TMP_JPG_DIR}" -name "*.pdf" | sort | xargs) --outfile "${OUTPUT}" From 8d1717896f0d1c9526b3ebf571cd6b55d6c3b545 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Thu, 13 Jul 2023 13:30:55 -0400 Subject: [PATCH 12/15] updated scan_page and scantype --- paperingest | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/paperingest b/paperingest index b6ef4bc..ec64ab4 100755 --- a/paperingest +++ b/paperingest @@ -56,10 +56,13 @@ _ask_mediaid _ask_scantype _ask_doctype + +MIDDLE_OPTIONS+=(--rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib) + _scan_page(){ - COMMAND="fscanx "${MIDDLE_OPTIONS[@]}" --rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib '${ORIGDIR}/${MEDIAID}_${COUNTER}.tif'" - exec & >> "${LOGDIR}/fscanx_process.txt" - eval "${COMMAND}" + echo "Running: fscanx ${MIDDLE_OPTIONS[@]} ${ORIGDIR}/${MEDIAID}_${COUNTER}.tif" >> "${LOGDIR}/fscanx_process.txt" + fscanx "${MIDDLE_OPTIONS[@]}" "${ORIGDIR}/${MEDIAID}_${COUNTER}.tif" | tee -a "${LOGDIR}/fscanx_process.txt" + } if [ -d "${OUTDIR_PAPER}/${MEDIAID}" ] ; then _report -wdt "It looks like this ${MEDIAID} was already scanned. If you want to overwrite the existing one please delete ${MEDIAID} first and then try again." @@ -82,6 +85,7 @@ START=$(date -u "+%Y%m%dT%H%M%SZ") if [[ "${SCANTYPE}" == "a" ]] ; then MIDDLE_OPTIONS+=(--adf) + SCANTYPE_ANSWER="ADF" _ask_doubleside if [[ "${DOUBLESIDE}" == 2 ]] ; then MIDDLE_OPTIONS+=(--duplex) @@ -93,6 +97,7 @@ if [[ "${SCANTYPE}" == "a" ]] ; then _scan_page elif [[ "${SCANTYPE}" == "f" ]] ; then MIDDLE_OPTIONS+=(--flatbed) + SCANTYPE_ANSWER="flatbed" COUNTER=1 _report -d -n "Hit enter to scan a page or q to stop scanning pages: " read PAGE_ANSWER @@ -126,9 +131,9 @@ echo "model id: ${MODEL}" >> "${LOGDIR}/capture.log" echo "os: ${OS}" >> "${LOGDIR}/capture.log" echo "identifier: ${MEDIAID}" >> "${LOGDIR}/capture.log" echo "operator: ${OP}" >> "${LOGDIR}/capture.log" -echo "scantype: ${SCANTYPE}" >> "${LOGDIR}/capture.log" +echo "scantype: ${SCANTYPE_ANSWER}" >> "${LOGDIR}/capture.log" echo "doctype: ${DOCTYPE}" >> "${LOGDIR}/capture.log" -echo "command: ${COMMAND}" >> "${LOGDIR}/capture.log" +echo "fscanx_options: ${MIDDLE_OPTIONS[@]}" >> "${LOGDIR}/capture.log" echo done scanning "${MEDIAID}" From 14aa017ac2cf9870bfafb4b6a4bf3de1487ade8d Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Thu, 13 Jul 2023 13:31:48 -0400 Subject: [PATCH 13/15] created separate file rename functions for adf and flatbed --- paperingest | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/paperingest b/paperingest index ec64ab4..aa68a35 100755 --- a/paperingest +++ b/paperingest @@ -77,9 +77,14 @@ LOGDIR="${OUTDIR_PAPER}/${MEDIAID}/metadata/logs" mkdir -p "${ORIGDIR}" mkdir -p "${LOGDIR}" -_file_rename(){ +_file_rename_flatbed(){ for file in ${ORIGDIR}/*1.tif ; do mv -v -n "${file}" "${file//1.tif/.tiff}" ; done +} + +_file_rename_adf(){ + for file in ${ORIGDIR}/*.tif ; do mv -v -n "${file}" "${file//.tif/.tiff}" ; done + } START=$(date -u "+%Y%m%dT%H%M%SZ") @@ -95,6 +100,7 @@ if [[ "${SCANTYPE}" == "a" ]] ; then _report -w "You said ${DOUBLESIDE} for the number of pages which is not valid, use 1 or 2." fi _scan_page + _file_rename_adf elif [[ "${SCANTYPE}" == "f" ]] ; then MIDDLE_OPTIONS+=(--flatbed) SCANTYPE_ANSWER="flatbed" @@ -104,7 +110,7 @@ elif [[ "${SCANTYPE}" == "f" ]] ; then while [[ ! "${PAGE_ANSWER}" = "q" ]] ; do _scan_page - _file_rename + _file_rename_flatbed ((COUNTER++)) _report -d -n "Hit enter to scan a page or q to stop scanning pages (next page is ${COUNTER}): " read PAGE_ANSWER From 80f6145c52355a53bdb55291b4fc1eb1312e0dee Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Wed, 23 Aug 2023 14:42:46 -0400 Subject: [PATCH 14/15] Update paperingest --- paperingest | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/paperingest b/paperingest index aa68a35..108cb36 100755 --- a/paperingest +++ b/paperingest @@ -62,6 +62,13 @@ MIDDLE_OPTIONS+=(--rgb --bits 8 --resolution 600 --auto-length --paper-width 102 _scan_page(){ echo "Running: fscanx ${MIDDLE_OPTIONS[@]} ${ORIGDIR}/${MEDIAID}_${COUNTER}.tif" >> "${LOGDIR}/fscanx_process.txt" fscanx "${MIDDLE_OPTIONS[@]}" "${ORIGDIR}/${MEDIAID}_${COUNTER}.tif" | tee -a "${LOGDIR}/fscanx_process.txt" +======= +COMMAND_OPTIONS+=(--rgb --bits 8 --resolution 600 --auto-length --paper-width 10200 --paper-height 13200 --rotate-n-n --left 0 --width 10200 --top 0 --height 13200 --double-feed n --tiff --no-jpeg --images-per-file 1 --compress zlib) + +_scan_page(){ + echo "Running: fscanx ${MIDDLE_OPTIONS[@]} ${COMMAND_OPTIONS[@]} ${ORIGDIR}/${MEDIAID}_scan${COUNTER}_${SCANTYPE_ANSWER}_.tif" >> "${LOGDIR}/fscanx_process.txt" + fscanx "${MIDDLE_OPTIONS[@]}" "${COMMAND_OPTIONS[@]}" "${ORIGDIR}/${MEDIAID}_scan${COUNTER}_${SCANTYPE_ANSWER}_.tif" | tee -a "${LOGDIR}/fscanx_process.txt" +>>>>>>> Stashed changes } if [ -d "${OUTDIR_PAPER}/${MEDIAID}" ] ; then @@ -79,6 +86,7 @@ mkdir -p "${LOGDIR}" _file_rename_flatbed(){ for file in ${ORIGDIR}/*1.tif ; do mv -v -n "${file}" "${file//1.tif/.tiff}" ; done + for file in ${ORIGDIR}/*1.tif ; do mv -v -n "${file}" "${file//1.tif/${COUNTER}.tiff}" ; done } @@ -91,16 +99,23 @@ START=$(date -u "+%Y%m%dT%H%M%SZ") if [[ "${SCANTYPE}" == "a" ]] ; then MIDDLE_OPTIONS+=(--adf) SCANTYPE_ANSWER="ADF" +_scan_page_adf() { + MIDDLE_OPTIONS=(--adf) + SCANTYPE_ANSWER="ADF" + _ask_doctype _ask_doubleside if [[ "${DOUBLESIDE}" == 2 ]] ; then MIDDLE_OPTIONS+=(--duplex) elif [[ "${DOUBLESIDE}" == 1 ]] ; then break + : +>>>>>>> Stashed changes else _report -w "You said ${DOUBLESIDE} for the number of pages which is not valid, use 1 or 2." fi _scan_page _file_rename_adf +<<<<<<< Updated upstream elif [[ "${SCANTYPE}" == "f" ]] ; then MIDDLE_OPTIONS+=(--flatbed) SCANTYPE_ANSWER="flatbed" @@ -120,6 +135,66 @@ else _report -w "You said ${SCANTYPE} for the scantype which is not valid, use 'a' or 'f'." exit 1 fi +======= + DOUBLESIDE="" + DOCTYPE_ANSWER="" +} + +_scan_page_flatbed(){ + MIDDLE_OPTIONS=(--flatbed) + SCANTYPE_ANSWER="flatbed" + #((COUNTER++)) + _report -d -n "Hit enter to scan a page or q to stop scanning pages: " + read PAGE_ANSWER + + while [[ "${PAGE_ANSWER}" != "q" && "${PAGE_ANSWER}" != "a" ]] ; do + _ask_doctype + _scan_page + _file_rename_flatbed + ((COUNTER++)) + _report -d -n "Hit enter to scan a page, a to change the scanner or q to stop scanning pages (next page is ${COUNTER}): " + read PAGE_ANSWER + if [[ "${PAGE_ANSWER}" == "a" ]] ; then + SCANTYPE="a" + echo "scantype is ${SCANTYPE} now." + fi + DOCTYPE_ANSWER="" + #echo "doctype is ${DOCTYPE_ANSWER}" + #_ask_doctype + done +} + + + +COUNTER=1 +if [[ "${SCANTYPE}" == "f" ]] ; then + _scan_page_flatbed +fi + +if [[ "${SCANTYPE}" == "a" ]] ; then + #((COUNTER++)) + while [[ "${PAGE_ANSWER}" != "q" ]] ; do + _scan_page_adf + #echo "duplex is ${DOUBLESIDE}" + ((COUNTER++)) + _report -d -n "Hit a to continue scanning, f to change the scanner or q to stop scanning pages: " + read PAGE_ANSWER + if [[ "${PAGE_ANSWER}" == "f" ]] ; then + SCANTYPE="f" + echo "scantype is ${SCANTYPE} now." + _scan_page_flatbed + fi + done +fi + +if [[ "${SCANTYPE}" != "a" && "${SCANTYPE}" != "f" && "${SCANTYPE}" != "q" ]] ; then + _report -w "You said ${SCANTYPE} for the scantype which is not valid, use 'a' or 'f'." + exit 1 +fi + + + +>>>>>>> Stashed changes FIRST=$(find "${ORIGDIR}" -type f -mindepth 1 -maxdepth 1 ! -name ".*" -exec ls -1rt '{}' \; | head -n 1) LAST=$(ls -1t "${ORIGDIR}" | head -n 1) @@ -140,6 +215,7 @@ echo "operator: ${OP}" >> "${LOGDIR}/capture.log" echo "scantype: ${SCANTYPE_ANSWER}" >> "${LOGDIR}/capture.log" echo "doctype: ${DOCTYPE}" >> "${LOGDIR}/capture.log" echo "fscanx_options: ${MIDDLE_OPTIONS[@]}" >> "${LOGDIR}/capture.log" +echo "fscanx_options: ${MIDDLE_OPTIONS[@]} ${COMMAND_OPTIONS[@]}" >> "${LOGDIR}/capture.log" echo done scanning "${MEDIAID}" From 23a1e9fba2251fae01481e0d73e5c95233f5e4b8 Mon Sep 17 00:00:00 2001 From: aditikp-07 Date: Wed, 23 Aug 2023 14:46:37 -0400 Subject: [PATCH 15/15] Create folderupgrade --- folderupgrade | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 folderupgrade diff --git a/folderupgrade b/folderupgrade new file mode 100644 index 0000000..e48106b --- /dev/null +++ b/folderupgrade @@ -0,0 +1,82 @@ +#!/bin/bash +#script to remove the submissionDocumentation folder +#calling mmfunctions +SCRIPTDIR=$(dirname $(which "${0}")) +. "${SCRIPTDIR}/mmfunctions" || { echo "Missing '${SCRIPTDIR}/mmfunctions'. Exiting." ; exit 1 ;}; +while [ "${*}" != "" ] ; do + echo "removing the submissionsDocumentation folder" + INPUTFILE="${1}" + echo "The input is ${INPUTFILE}" + SUBDOC="${INPUTFILE}/metadata/submissionDocumentation/" + METADOC="${INPUTFILE}/metadata/" + OBJECTDOC="${INPUTFILE}/objects/Preservation/" + shift + "${SCRIPTDIR}/removeDSStore" "${INPUTFILE}" + #remove unnecessary directory + if [ -d "${SUBDOC}" ] ; then + mv -v -n "${SUBDOC}"* "${METADOC}" + #mv "${SUBDOC}"* + echo "going to delete for realsies" + rmdir "${SUBDOC}" + fi + + for entry in "${OBJECTDOC}"* ; do + echo "here is file $entry" + if [ -d "$entry" ] ; then + cd "$entry" + mv -v -n * .[^.]* "${INPUTFILE}/objects/" + + echo "deleting unnecessary folders" + + rmdir "$entry" + + #moving images folder to metadata and renaming + mkdir -p ${METADOC}depictions/ ; mv -n "${INPUTFILE}/objects/Image"* "${METADOC}depictions/object_photos" + + #removing extra Image folder + if [ -d "${METADOC}depictions/object_photos/Image" ] ; then + cd "${METADOC}depictions/object_photos/Image" + mv -v -n * .[^.]* .. + echo "deleting extra Image folder" + rmdir "${METADOC}depictions/object_photos/Image" + fi + + + #removing Preservation folder + mv -n "${INPUTFILE}/objects/Preservation Master/"* "${INPUTFILE}/objects/" + echo "removing empty preservation folder" + rmdir "${INPUTFILE}/objects/Preservation Master" + rmdir "${OBJECTDOC}" + + #renaming restoration/access folder + if [ -d "${INPUTFILE}/objects/Restoration" ] ; then + mv -v "${INPUTFILE}/objects/Restoration" "${INPUTFILE}/objects/restoration" + fi + + #renaming access folder + if [ -d "${INPUTFILE}/objects/Access/" ] ; then + echo "moving to restoration folder" + mkdir -p ${INPUTFILE}/objects/restoration/ + echo "created restoration folder" + for object in "${INPUTFILE}/objects/Access/" ; do + echo "here is $object" + cd $object + mv -n * .[^.]* "${INPUTFILE}/objects/restoration/" + done + rmdir "${INPUTFILE}/objects/Access" + fi + fi + done +done +#checksum verification +cd ${INPUTFILE}/objects && +for file in *; do + if [[ -f "$file" ]] ; then + echo "file is $file.md5" + NEWMD5="$(md5 -q "$file")" + OLDMD5="$(cat ${METADOC}${file}.md5)" + fi +done +if [[ "${NEWMD5}" = "${OLDMD5}" ]] ; then + echo "checksums match" +fi