Skip to content
This repository has been archived by the owner on Aug 26, 2021. It is now read-only.

Commit

Permalink
Release v2.1.8 Sponsored by IFIC
Browse files Browse the repository at this point in the history
  • Loading branch information
sochix committed May 16, 2018
1 parent 15b503f commit ea6dcff
Show file tree
Hide file tree
Showing 39 changed files with 791 additions and 388 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
Change log
==========
2.1.8 (2018-05-16)
-------------------

This Release is sponsored by [IFIC.co.uk](http://www.ific.co.uk/), special thanks to Dr. Barry Clark

### What's new:

- Dramatic increase of crawling speed
- Storage consumption is minimal, now you download files directly from crawled fs
- File removal sync -> if file removed from folder it will be marked as removed in Ambar
- Added ability to ignore files by folders, extensions and file names
- Bug fixes and small changes to docker-compose.yml

Update notes: before update, please download the latest [docker-compose.yml](https://github.com/RD17/ambar/blob/master/docker-compose.yml) and [read the installation instruction](https://github.com/RD17/ambar/blob/master/Install.md).

2.0.0rc (2018-04-18)
-------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,7 @@ const HintCard = (props) => {
<span className={classes.clickableSpan} onTouchTap={() => { performSearchByQuery('tags:ocr,ui-upload') }}>
tags:ocr,ui-upload
</span> - {localization.searchPage.tagsQueryLabel}
</li>
<li>
<span className={classes.clickableSpan} onTouchTap={() => { performSearchByQuery('entities:"[email protected]"') }}>
entities:"[email protected]"
</span> - {localization.searchPage.entitiesQueryLabel}
</li>
</li>
<li>
<span className={classes.clickableSpan} onTouchTap={() => { performSearchByQuery('show:removed') }}>
show:removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class DetailedView extends Component {
key={hit.file_id}
hit={hit}
thumbnailUri={urls.ambarWebApiGetThumbnail(hit.sha256)}
downloadUri={urls.ambarWebApiGetFile(hit.meta.download_uri)}
downloadUri={urls.ambarWebApiGetFile(hit.meta.full_name)}
{...this.props}
/>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,14 @@ class DetailedCard extends Component {
</div>}
<CardActions className={classes.searchResultRowCardFooter}>
<div style={{ display: 'flex', justifyContent: !isHidden ? 'space-between' : 'flex-end', width: '100%' }}>
{!isHidden && <div>
{preserveOriginals && <FlatButton
{!isHidden && !hidden_mark && meta.source_id != 'ui-upload' && !meta.extra.some(item => item.key === 'from_container') && <div>
<FlatButton
icon={<FileDownloadIcon />}
label={localization.searchPage.downloadLabel}
title={localization.searchPage.downloadDescriptionLabel}
primary={true}
onTouchTap={() => { window.open(downloadUri) }}
/>}
/>
</div>}
<div>
{!hidden_mark && <FlatButton
Expand All @@ -118,14 +118,7 @@ class DetailedCard extends Component {
title={localization.searchPage.removeDescriptionLabel}
style={{ color: 'grey' }}
onTouchTap={() => hideFile(fileId)}
/>}
{(isHidden || hidden_mark) && <FlatButton
icon={<UndoIcon />}
label={localization.searchPage.restoreLabel}
title={localization.searchPage.restoreDescriptionLabel}
primary={true}
onTouchTap={() => showFile(fileId)}
/>}
/>}
</div>
</div>}
</CardActions>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class TableView extends Component {
key={hit.file_id}
hit={hit}
thumbnailUri={urls.ambarWebApiGetThumbnail(hit.sha256)}
downloadUri={urls.ambarWebApiGetFile(hit.meta.download_uri)}
downloadUri={urls.ambarWebApiGetFile(hit.meta.full_name)}
{...this.props}
/>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ class TableRowResult extends Component {
<UpdatedDateTimeLabel meta={meta} searchQuery={searchQuery} formatFunc={getFormattedTime} />
</TableRowColumn>
<TableRowColumn style={{ width: '220px' }}>
{preserveOriginals && <IconButton onTouchTap={() => { window.open(downloadUri) }}
{!hidden_mark && meta.source_id != 'ui-upload' && !meta.extra.some(item => item.key === 'from_container') && <IconButton onTouchTap={() => { window.open(downloadUri) }}
title={localization.searchPage.downloadDescriptionLabel}>
<FileDownloadIcon color='#00bcd4' hoverColor='#80deea' />
</IconButton>}
</IconButton>}
<IconButton
disabled={!(contentHighlight && content.thumb_available)}
onTouchTap={() => {
Expand All @@ -94,10 +94,7 @@ class TableRowResult extends Component {
</IconButton>
{!hidden_mark && <IconButton onTouchTap={() => hideFile(fileId)} title={localization.searchPage.removeLabel}>
<DeleteIcon color='#00bcd4' hoverColor='#80deea' />
</IconButton>}
{(isHidden || hidden_mark) && <IconButton onTouchTap={() => showFile(fileId)} title={localization.searchPage.restoreLabel}>
<UndoIcon color='#00bcd4' hoverColor='#80deea' />
</IconButton>}
</IconButton>}
</TableRowColumn>
</TableRow>
)
Expand Down
12 changes: 3 additions & 9 deletions FrontEnd/src/layouts/MainLayout/components/RateUs/RateUs.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,18 @@ class RateUs extends Component {
</div>
<div className={classes.rateUsText}>
<p>
Let's spread the word that Ambar is awesome! Help us make Ambar even better, follow us on Twitter or give us your stars on Github.
Let's spread the word that Ambar is awesome! Help us make Ambar even better, give us your stars on Github.
</p>
<p>
Together we will build the best document search system in the world!
Together we will build the best document search engine in the world!
</p>
<div style={{display: 'flex', justifyContent: 'center'}}>
<FlatButton
label="Star"
primary={true}
onTouchTap={() => goToUrl('https://github.com/RD17/ambar')}
icon={<img height={20} src={GithubIcon} />}
/>
<FlatButton
label="Tweet"
primary={true}
onTouchTap={() => goToUrl('https://twitter.com/intent/tweet?text=%23Ambar%20is%20awesome%20%23DocumentSearchSystem!%20Check%20it%20out%20on%20https%3A%2F%2Fambar.cloud')}
icon={<img height={20} src={TwitterIcon} />}
/>
/>
</div>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion FrontEnd/src/utils/urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const init = (apiHost) => {
ambarWebApiSearchByStringQuery: (query, page, size) => `${apiHost}/api/search?query=${encodeURIComponent(query)}&page=${page}&size=${size}`,
ambarWebApiLoadContentHightlight: (fileId, query) => `${apiHost}/api/search/${fileId}/?query=${encodeURIComponent(query)}`,
ambarWebApiLoadFullContentHightlight: (fileId, query) => `${apiHost}/api/search/${fileId}/full?query=${encodeURIComponent(query)}`,
ambarWebApiGetFile: (metaId) => `${apiHost}/api/files/${metaId}`,
ambarWebApiGetFile: (fullPath) => `${apiHost}/api/files/download?path=${encodeURIComponent(fullPath)}`,
ambarWebApiGetFileText: (metaId) => `${apiHost}/api/files/${metaId}/text`,

ambarWebApiGetCrawlers: () => `${apiHost}/api/crawlers`,
Expand Down
41 changes: 35 additions & 6 deletions Install.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Install docker and docker-compose

To install and configure Ambar you need an expertise in unix, Docker and Docker Compose.
If you have any difficulties installing and running Ambar you can request a dedicated support session by mailing us on [email protected]
If you have any difficulties installing and running Ambar you can request a dedicated support session by mailing us on [[email protected]](mailto:[email protected])

Please refer to official [Docker](https://docs.docker.com/install/) and [Docker Compose](https://docs.docker.com/compose/install/) installation instructions.
Please refer to official [Docker](https://docs.docker.com/install/) and [Docker Compose](https://docs.docker.com/compose/install/) installation instructions.

To check if everything is installed correctly please run:

Expand Down Expand Up @@ -44,11 +44,40 @@ Then modify it:

## Set up your crawlers

- Find ```crawler0``` block - this is a template for your new crawler
- Replace ```${crawlerName}``` with desired name for your crawler (only lowercase latin letters and dashes are supported)
- Find ````${crawlerName}``` block - this is a template for your new crawler
- Replace ```${crawlerName}``` with desired name for your crawler (only lowercase latin letters and dashes are supported). Check that service block name and crawler name are the same
- Replace ```${pathToCrawl}``` with path to a local folder to be crawled, if you want to crawl SMB or FTP - just mount it with standard unix tools

You can add additional crawlers by copying ```crawler0``` segment and editing its settings (don't forget to edit the service name, e.g. to ```crawler1```).
### Optional settings
- `ignoreFolders` - ignore fodlers by [glob pattern](https://github.com/isaacs/node-glob#glob-primer)
- `ignoreExtensions` - ignore file extensions by [glob pattern](https://github.com/isaacs/node-glob#glob-primer) (default: .{exe,dll})
- `ignoreFileNames` - ignore file names by [glob pattern](https://github.com/isaacs/node-glob#glob-primer) (default: ~*)
- `maxFileSize` - max file size (default: 300mb)

### Crawler configuration example:
```
Docs:
depends_on:
serviceapi:
condition: service_healthy
image: ambar/ambar-local-crawler
restart: always
networks:
- internal_network
expose:
- "8082"
environment:
- name=Docs
- ignoreFolders=**/ForSharing/**
- ignoreExtensions=.{exe,dll,rar}
- ignoreFileNames=*backup*
- maxFileSize=15mb
volumes:
- /media/Docs:/usr/data
```


You can add more crawlers by copying ```${crawlerName}``` segment and editing its settings (don't forget to edit the service name).

# Start Ambar

Expand All @@ -58,4 +87,4 @@ To start Ambar run ```docker-compose up -d```.

Ambar UI will be accessible on ```http://${ambarHostIpAddress}/```

If you have any difficulties installing and running Ambar you can request a dedicated support session by mailing us on [email protected]
If you have any difficulties installing and running Ambar you can request a dedicated support session by mailing us on [[email protected]](mailto:[email protected])
2 changes: 1 addition & 1 deletion LocalCrawler/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY . .
RUN yarn install
RUN yarn run build

CMD node dist
CMD node --max-old-space-size=8096 dist

HEALTHCHECK --interval=5s --timeout=30s --retries=50 \
CMD curl -f localhost:8082/api/ || exit 1
2 changes: 2 additions & 0 deletions LocalCrawler/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"author": "RD17",
"license": "MIT",
"dependencies": {
"amqplib": "^0.5.2",
"babel-eslint": "^7.1.0",
"babel-polyfill": "^6.26.0",
"body-parser": "^1.13.3",
Expand All @@ -32,6 +33,7 @@
"eslint-plugin-promise": "^3.3.0",
"express": "^4.13.3",
"idempotent-babel-polyfill": "^0.1.1",
"minimatch": "^3.0.4",
"moment": "^2.15.0",
"request": "^2.85.0",
"request-promise-native": "^1.0.5",
Expand Down
43 changes: 41 additions & 2 deletions LocalCrawler/src/api/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { version, name, description } from '../../package.json'
import { Router } from 'express'
// import config from '../config'
import fs from 'fs'
import path from 'path'
import config from '../config'

import * as ApiProxy from '../services/ApiProxy'

export default () => {
let api = Router()
Expand All @@ -11,7 +15,42 @@ export default () => {
version: version,
description: description
})
})
})

api.get('/download', (req, res) => {
const filePath = req.query.path

if (!filePath) {
res.sendStatus(400)
return
}

let absolutePath = null
let doesFileExist = false

try {
absolutePath = path.join(config.crawlPath, filePath)
doesFileExist = fs.existsSync(absolutePath)
} catch (error) {
ApiProxy.logData(config.name, 'error', `Error: ${error}`)
res.status(500).json({ error: error })
return
}

if (!doesFileExist) {
res.sendStatus(404)
return
}

res.download(absolutePath, (error) => {
if (error) {
if (!res.headersSent) {
res.status(500).json({ error: error })
}
ApiProxy.logData(config.name, 'error', `[${absolutePath}] Error: ${error}`)
}
})
})

return api
}
13 changes: 8 additions & 5 deletions LocalCrawler/src/config.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
const defaultConfig = {
"port": 8082,
"bodyLimit": "10mb",
"crawlPath": "C:\\Dropbox\\Development\\Git\\Ambar\\LocalCrawler\\node_modules",
"apiUrl": "http://ambar:8081",
"allowedFilesRegex": '(\\.doc[a-z]*$)|(\\.xls[a-z]*$)|(\\.txt$)|(\\.pst$)|(\\.csv$)|(\\.htm[a-z]*$)|(\\.ppt[a-z]*$)|(\\.pdf$)|(\\.msg$)|(\\.zip$)|(\\.eml$)|(\\.rtf$)|(\\.md$)|(\\.png$)|(\\.bmp$)|(\\.tif[f]*$)|(\\.jp[e]*g$)',
"name": "nodemodules-crawler",
"maxFileSize": "30mb"
"crawlPath": "/usr/data",
"apiUrl": "http://serviceapi:8080",
"ignoreFolders": "**/test/**",
"ignoreExtensions": ".{exe,dll}",
"ignoreFileNames": "~*",
"name": "localhost",
"maxFileSize": "300mb",
"rabbitHost": "amqp://rabbit"
}

let config = null
Expand Down
58 changes: 37 additions & 21 deletions LocalCrawler/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,47 @@ import cors from 'cors'
import bodyParser from 'body-parser'
import api from './api'
import config from './config'
import cluster from 'cluster'

import 'babel-core/register'
import 'idempotent-babel-polyfill'

import { FileWatchService, ApiProxy } from './services'

ApiProxy.logData(config.name, 'info', 'Crawler initialized')
FileWatchService.startWatch()
let app = null
if (cluster.isMaster) {
ApiProxy.logData(config.name, 'info', 'API runs on master thread')
ApiProxy.logData(config.name, 'info', 'Creating fork for the file-watcher process')
cluster.fork()

app = express()
app.server = http.createServer(app)

app.use(cors({
credentials: true,
origin: true
}))

app.use(bodyParser.json({
limit: config.bodyLimit
}))

// api router
app.use('/api', api())
app.server.listen(process.env.PORT || config.port)

console.log(`Started API on ${app.server.address().address}:${app.server.address().port}`)


} else {
ApiProxy.logData(config.name, 'info', 'File-watcher runs on worker thread')

FileWatchService.startWatch()
.catch(err => {
ApiProxy.logData(config.name, 'error', `Error: ${err}`)
process.exit(1)
})
}

export default app

let app = express()
app.server = http.createServer(app)

app.use(cors({
credentials: true,
origin: true
}))

app.use(bodyParser.json({
limit: config.bodyLimit
}))

// api router
app.use('/api', api())
app.server.listen(process.env.PORT || config.port)

console.log(`Started on ${app.server.address().address}:${app.server.address().port}`)

export default app
Loading

0 comments on commit ea6dcff

Please sign in to comment.