Skip to content
Snippets Groups Projects
Commit 1201432d authored by Ossi Laine's avatar Ossi Laine
Browse files

Fix bug and update README

parent dbc0adf1
No related branches found
No related tags found
No related merge requests found
......@@ -12,5 +12,8 @@ restart:
docker-compose -f docker/docker-compose.yml down -v
docker-compose -f docker/docker-compose.yml up -d
kill:
docker-compose -f docker/docker-compose.yml down -v
logs:
docker-compose -f docker/docker-compose.yml logs
# file-parser
EXtract text from documents or images
\ No newline at end of file
Extract text from documents or images
### run locally:
```
sudo make deploy
```
### test file parsing:
```
curl -F "file=@/home/user/Documents/test.pdf" localhost:5000/upload
```
# coding: utf-8
import logging
import os
import mimetypes
import magic
from flask import Flask, render_template, jsonify, request, flash
from flask import Flask, jsonify, request
from parser import convert_document
from error import InvalidUsage
from utils import error_template
app = Flask(__name__)
@app.before_first_request
def setup_logging():
if not app.debug:
......@@ -19,6 +16,7 @@ def setup_logging():
app.logger.addHandler(logging.StreamHandler())
app.logger.setLevel(logging.INFO)
# Using Celery for long running queries
# from celery import Celery
# app.config['CELERY_BROKER_URL'] = 'redis://redis:6379/0'
......@@ -27,6 +25,7 @@ def setup_logging():
# celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])
# celery.conf.update(app.config)
def get_mime(uploaded_file):
"""1. get mime from Flask FileStorage object
2. get mime fron suffix
......@@ -37,12 +36,14 @@ def get_mime(uploaded_file):
content_type = uploaded_file.content_type
if content_type in [None, 'application/octet-stream']:
if content_type in [None, "application/octet-stream"]:
content_type = mimetypes.guess_type(uploaded_file.filename)[0] or None
if not content_type:
content_type = magic.from_buffer(uploaded_file.read(), mime=True) or \
'application/octet-stream'
content_type = (
magic.from_buffer(uploaded_file.read(), mime=True)
or "application/octet-stream"
)
return content_type
......@@ -58,16 +59,15 @@ def hello():
return jsonify(rv)
@app.route('/upload', methods=['POST'])
@app.route("/upload", methods=["POST"])
def upload_file():
if request.method == 'POST':
if 'file' not in request.files:
if request.method == "POST":
if "file" not in request.files:
return jsonify({"results": "no file"})
uploaded_file = request.files['file']
uploaded_file = request.files["file"]
force_ocr = request.form.get('ocr')
force_ocr = request.form.get("ocr")
if force_ocr:
force_ocr = int(force_ocr) if force_ocr.isdigit() else None
......@@ -83,11 +83,8 @@ def upload_file():
# strip whitespace from content
content = content.strip() if isinstance(content, str) else content
return jsonify({
"result": content,
"meta": meta
})
return jsonify({"result": content, "meta": meta})
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0')
if __name__ == "__main__":
app.run(debug=False, host="0.0.0.0")
def error_template():
return {
"error": None,
"detail": None,
"meta": {
"status_code": 400
}
}
return {"error": None, "detail": None, "meta": {"status_code": 400}}
class InvalidUsage(Exception):
class InvalidUsage(Exception):
status_code = 400
def __init__(self, message, status_code=None, payload=None):
......@@ -22,11 +14,10 @@ class InvalidUsage(Exception):
self.status_code = status_code
try:
self.payload = payload.decode('utf-8')
self.payload = payload.decode("utf-8")
except (UnicodeDecodeError, AttributeError):
self.payload = None
def to_dict(self):
template = error_template()
template["error"] = self.message
......@@ -34,4 +25,3 @@ class InvalidUsage(Exception):
template["meta"]["status_code"] = self.status_code
return template
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment