Skip to content

Commit

Permalink
feat(server): support multi languages on TCP server
Browse files Browse the repository at this point in the history
  • Loading branch information
louistiti committed Mar 5, 2022
1 parent cbe89ed commit a808742
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 24 deletions.
2 changes: 1 addition & 1 deletion bridges/python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding:utf-8 -*-

import utils
from sys import argv, path
from sys import path
from json import dumps, loads
from importlib import import_module

Expand Down
58 changes: 39 additions & 19 deletions bridges/python/tcp-server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,39 @@
import select
import os
import json
from sys import argv
from os.path import join, dirname
from dotenv import load_dotenv
import spacy

dotenv_path = join(dirname(__file__), '../../.env')
load_dotenv(dotenv_path)

nlp = spacy.load('en_core_web_trf', disable=['tagger', 'parser', 'attribute_ruler', 'lemmatizer'])

entity_mapping = {
'PERSON': 'person',
'GPE': 'location',
'ORG': 'organization'
lang = argv[1] or 'en'

model_mapping = {
'en': {
'model': 'en_core_web_trf',
'disable': ['tagger', 'parser', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PERSON': 'person',
'GPE': 'location',
'ORG': 'organization'
}
},
'fr': {
'model': 'fr_core_news_md',
'disable': ['tok2vec', 'morphologizer', 'parser', 'senter', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PER': 'person',
'LOC': 'location',
'ORG': 'organization'
}
}
}

nlp = spacy.load(model_mapping[lang]['model'], disable=model_mapping[lang]['disable'])

ws_server_host = os.environ.get('LEON_PY_WS_SERVER_HOST', '0.0.0.0')
ws_server_port = os.environ.get('LEON_PY_WS_SERVER_PORT', 1342)

Expand All @@ -33,18 +51,19 @@ def extract_spacy_entities(utterance):
entities = []

for ent in doc.ents:
entity = entity_mapping[ent.label_]
entities.append({
'start': ent.start_char,
'end': ent.end_char,
'len': len(ent.text),
'sourceText': ent.text,
'utteranceText': ent.text,
'entity': entity,
'resolution': {
'value': ent.text
}
})
if ent.label_ in model_mapping[lang]['entity_mapping']:
entity = model_mapping[lang]['entity_mapping'][ent.label_]
entities.append({
'start': ent.start_char,
'end': ent.end_char,
'len': len(ent.text),
'sourceText': ent.text,
'utteranceText': ent.text,
'entity': entity,
'resolution': {
'value': ent.text
}
})

return entities

Expand All @@ -70,7 +89,8 @@ def extract_spacy_entities(utterance):
res = {
'topic': 'spacy-entities-received',
'data': {
'spacyEntities': entities
'spacyEntities': entities,
'lang': lang
}
}

Expand Down
5 changes: 2 additions & 3 deletions scripts/setup/setup-python-packages.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ export default () => new Promise(async (resolve, reject) => {
log.info('Installing spaCy models...')
// Find new spaCy models: https://github.com/explosion/spacy-models/releases
await Promise.all([
command('pipenv run spacy download en_core_web_trf-3.2.0 --direct', { shell: true })
// command('pipenv run spacy download en_core_web_sm-3.1.0 --direct', { shell: true }),
// command('pipenv run spacy download fr_core_news_sm-3.1.0 --direct', { shell: true })
command('pipenv run spacy download en_core_web_trf-3.2.0 --direct', { shell: true }),
command('pipenv run spacy download fr_core_news_md-3.2.0 --direct', { shell: true })
])

log.success('spaCy models installed')
Expand Down
2 changes: 2 additions & 0 deletions server/src/helpers/lang.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ lang.getLongCode = (shortLang) => {
return null
}

lang.getShortCode = (longLang) => langs[longLang].short

export default lang
3 changes: 2 additions & 1 deletion server/src/index.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import dotenv from 'dotenv'
import { spawn } from 'child_process'

import lang from '@/helpers/lang'
import TcpClient from '@/core/tcp-client'
import server from '@/core/http-server/server'

(async () => {
dotenv.config()

spawn('pipenv run python bridges/python/tcp-server.py', { shell: true, detached: true })
spawn(`pipenv run python bridges/python/tcp-server.py ${lang.getShortCode(process.env.LEON_LANG)}`, { shell: true, detached: true })

global.tcpClient = new TcpClient(
process.env.LEON_PY_WS_SERVER_HOST,
Expand Down

0 comments on commit a808742

Please sign in to comment.