Created
January 5, 2019 09:04
-
-
Save kojiisd/2c8a48548427522987fd9c41d7f11238 to your computer and use it in GitHub Desktop.
Amazon TranslateにどうにかExcelを翻訳させてみた ref: https://qiita.com/kojiisd/items/fd790717084bbca51930
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ pip install xlrd | |
# 読み込み用 | |
$ pip install xlwt | |
$ pip install openpyxl | |
# 書き込み用 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'libs')) | |
import pandas as pd | |
import openpyxl | |
def main(): | |
# Read excel file with pandas | |
df = pd.read_excel('data.xls', sheet_name='test', header=None) | |
# Write excel file | |
df.to_excel('pandas_to_excel.xlsx', sheet_name='new_test', index=False, header=False) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
REGION = 'us-east-1' | |
SRC_LANG = 'ja' | |
DIST_LANG = 'en' | |
translate = boto3.client(service_name='translate', region_name=REGION, use_ssl=True) | |
def translate_df(df): | |
translate = boto3.client('translate', region_name=REGION) | |
response = df.applymap(lambda c: translate.translate_text(Text=c, SourceLanguageCode=SRC_LANG, TargetLanguageCode=DIST_LANG)['TranslatedText'] if c != '' and type(c) == str else c) | |
return response | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"TranslatedText": "Unit Test Specifications and Report", | |
"SourceLanguageCode": "ja", | |
"TargetLanguageCode": "en", | |
"ResponseMetadata": { | |
"RequestId": "daabb017-10c4-11e9-b617-d373f8e7a2f8", | |
"HTTPStatusCode": 200, | |
"HTTPHeaders": { | |
"content-type": "application/x-amz-json-1.1", | |
"date": "Sat, 05 Jan 2019 08:35:28 GMT", | |
"x-amzn-requestid": "daabb017-10c4-11e9-b617-d373f8e7a2f8", | |
"content-length": "108", | |
"connection": "keep-alive" | |
}, | |
"RetryAttempts": 0 | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'libs')) | |
import pandas as pd | |
import openpyxl | |
import boto3 | |
REGION = 'us-east-1' | |
SRC_LANG = 'ja' | |
DIST_LANG = 'en' | |
translate = boto3.client(service_name='translate', region_name=REGION, use_ssl=True) | |
def translate_df(df): | |
translate = boto3.client('translate', region_name=REGION) | |
response = df.applymap(lambda c: translate.translate_text(Text=c, SourceLanguageCode=SRC_LANG, TargetLanguageCode=DIST_LANG)['TranslatedText'] if c != '' and type(c) == str else c) | |
return response | |
def main(): | |
# Read excel file with pandas | |
df = pd.read_excel('data.xls', sheet_name='test', header=None) | |
df = df.fillna('') | |
df_new = translate_df(df) | |
# Write excel file | |
df_new.to_excel('pandas_to_excel.xlsx', sheet_name='new_test', index=False, header=False) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment