Skip to content

Commit 0b99ac7

Browse files
committed
add script to create json and update requirements
1 parent 92f1a24 commit 0b99ac7

File tree

2 files changed

+273
-1
lines changed

2 files changed

+273
-1
lines changed

i18n_create_json.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
"""
2+
Create json file for internationalitation process.
3+
4+
Script to create json file using data-i18n attributes
5+
inside html files for internationalitation process
6+
"""
7+
8+
import json
9+
import click
10+
import re
11+
import glob
12+
from pathlib import Path
13+
from bs4 import BeautifulSoup
14+
from tabulate import tabulate
15+
from colorama import Fore, Style
16+
from collections import OrderedDict, Counter
17+
18+
19+
def read_file(filename):
20+
"""Read a file an return its content as string."""
21+
with open(filename, "r") as fstream:
22+
content = fstream.read()
23+
return content
24+
25+
26+
def write_file(filename, content):
27+
"""Write into a file."""
28+
with open(filename, "w") as f:
29+
f.write(content)
30+
31+
32+
def read_json(filename):
33+
"""Read a json file an return its content as dict."""
34+
with open(filename, "r") as f:
35+
content = json.loads(f.read())
36+
return content
37+
38+
39+
def write_json(filename, data):
40+
"""Write dict as json file."""
41+
with open(filename, "w") as f:
42+
f.write(json.dumps(data, indent=4))
43+
44+
45+
def show_table(datadict, color, fmt="pretty"):
46+
"""Show dict as a table with tabulate with color."""
47+
print(color)
48+
print(tabulate(datadict, headers="keys", tablefmt=fmt))
49+
print(Style.RESET_ALL)
50+
51+
52+
def print_info(datadict, color, title=''):
53+
"""Show dict info as a table with tabulate with color."""
54+
print(color)
55+
print(f"+{title:-^60s}+")
56+
print(json.dumps(datadict, indent=4))
57+
print(f"+{'':-^60s}+")
58+
print(Style.RESET_ALL)
59+
60+
61+
def create_json_i18n(filename, json_content, verbose=False):
62+
"""Create the dict/json i18n from html content.
63+
64+
Search for data-i18n attribute inside html content
65+
and generate/update the json file following banana format
66+
"""
67+
content = read_file(filename)
68+
soup = BeautifulSoup(content, 'html.parser')
69+
matches = soup.find_all([], {"data-i18n": True})
70+
oldfields = set(json_content)
71+
72+
for tag in matches:
73+
if not json_content.get(tag.get("data-i18n"), None):
74+
json_content.update({f"{tag.get('data-i18n')}": ''})
75+
76+
newfields = {
77+
key
78+
for key in json_content.keys() if key not in oldfields
79+
}
80+
if verbose:
81+
show_table({
82+
"file": [Path(filename).name],
83+
"existing fields": oldfields,
84+
"new fields": newfields
85+
}, Fore.YELLOW)
86+
87+
88+
def normalize(name):
89+
"""Allow click to use command with underscore."""
90+
return name.replace("_", "-")
91+
92+
93+
@click.group(context_settings={"token_normalize_func": normalize})
94+
def cli():
95+
"""Create/update json for internationalitation.
96+
97+
This program allows you to create or update a <lang>.json file
98+
for an internationalitation process, using the banana format. You
99+
can look for the data-i18n atrribute in one or several html files at time
100+
and therefore, extract them and create/update the json file. Also you
101+
can check if there are attributes duplicated in html files before
102+
put in json
103+
file.
104+
105+
To show help for specific command, you can run:
106+
107+
python i18n_create_json.py COMMAND --help
108+
"""
109+
pass
110+
111+
112+
@cli.command()
113+
@click.option('-f', "--file", help="to pass the html file which it will \
114+
be scanned")
115+
@click.option('--output', '-o', help="To give the name of the output json")
116+
@click.option('-i', "--inplace", is_flag=True, help="To create/update the file. Without \
117+
this option, the command is executed in a dry-run mode")
118+
@click.option('-v', "--verbose", is_flag=True, help="To show more detailed information \
119+
about the process")
120+
def onefile(**kwargs):
121+
"""To search all data-i18n attributes inside one html file.
122+
123+
This command allows you look for all data-i18n attributes inside one html
124+
file passed by command line with the option -f/--file and create or update
125+
a json file with these attributes following the banana format.
126+
127+
128+
How to use:
129+
130+
1. To execute in dry-run mode
131+
132+
$ python i18n_create_json.py onefile --file="path/to/file.html"
133+
-o path/to/output.json
134+
135+
136+
2. To execute and replace in-place
137+
138+
$ python i18n_create_json.py onefile --file="path/to/file.html"
139+
-o path/to/output.json -i/--inplace
140+
141+
"""
142+
filename = kwargs['file']
143+
trfile_content = {}
144+
verbose = kwargs["verbose"]
145+
146+
outfile = Path(filename).parent.parent / "static/i18n" / kwargs['output']
147+
148+
if outfile.exists():
149+
trfile_content = read_json(outfile)
150+
151+
metadata = {"@metadata": trfile_content.pop("@metadata", None)}
152+
create_json_i18n(filename, trfile_content, verbose)
153+
trfile_content = {
154+
**metadata,
155+
**OrderedDict(sorted(trfile_content.items()))
156+
}
157+
158+
if not kwargs["inplace"]:
159+
print_info(
160+
trfile_content,
161+
Fore.LIGHTGREEN_EX,
162+
title=f"New content for {kwargs['output']}"
163+
)
164+
else:
165+
write_json(outfile, trfile_content)
166+
167+
168+
@cli.command()
169+
@click.option("--pattern", help="To pass the html files using unix wildcards")
170+
@click.option('--output', '-o', help="To give the name of the output json")
171+
@click.option('-i', "--inplace", is_flag=True, help="To create/update the file. \
172+
Without this option, the command is executed in a dry-run mode")
173+
@click.option('-v', "--verbose", is_flag=True, help="To show more detailed information \
174+
about the process")
175+
def severalfiles(**kwargs):
176+
"""To search all data-i18n attributes inside several html files.
177+
178+
This command allows you look for all data-i18n attributes inside several
179+
html files passed by command line with the option -p/--pattern as a
180+
pattern. You can use the bash wildcards. With this pattern, you can create
181+
or update the json file with these attributes following the banana format.
182+
183+
184+
How to use:
185+
186+
1. To execute in dry-run mode
187+
188+
$ python i18n_create_json.py severalfiles --pattern="path/to/file*.html"
189+
-o path/to/output.json
190+
191+
192+
2. To execute and replace in-place
193+
194+
$ python i18n_create_json.py everalfiles --pattern="path/to/file*.html"
195+
-o path/to/output.json -i/--inplace
196+
197+
"""
198+
pattern = kwargs['pattern']
199+
verbose = kwargs["verbose"]
200+
201+
trfile_content = {}
202+
outfile = Path(pattern).parent.parent / "static/i18n" / kwargs['output']
203+
204+
if outfile.exists():
205+
trfile_content = read_json(outfile)
206+
207+
metadata = {"@metadata": trfile_content.pop("@metadata", None)}
208+
files = glob.glob(pattern)
209+
for file in files:
210+
create_json_i18n(file, trfile_content, verbose)
211+
212+
trfile_content = {
213+
**metadata,
214+
**OrderedDict(sorted(trfile_content.items()))
215+
}
216+
217+
if not kwargs["inplace"]:
218+
print_info(
219+
trfile_content,
220+
Fore.LIGHTGREEN_EX,
221+
title=f"New content for {kwargs['output']}"
222+
)
223+
else:
224+
write_json(outfile, trfile_content)
225+
226+
227+
@cli.command()
228+
@click.option('--path', required=True, help="To pass the html files using \
229+
unix wildcards")
230+
def check_duplicates(**kwargs):
231+
"""To look for data-i18n attributes duplicated.
232+
233+
This command allows you look for all duplicated data-i18n attributes
234+
inside several html files passed by command line with the option
235+
--path as a pattern.
236+
237+
How to use:
238+
239+
1. To show duplicated data-i18n attributes
240+
241+
$ python i18n_create_json.py check_duplicates
242+
--path="path/to/file*.html"
243+
244+
"""
245+
path = kwargs["path"]
246+
files = glob.glob(path)
247+
rx = re.compile(r'(data-i18n\b=\"([^"]*)\")')
248+
content = []
249+
for file in files:
250+
string = read_file(file)
251+
matches = rx.finditer(string)
252+
for match in matches:
253+
content.append(match.group(2))
254+
255+
duplicates = [key for key, val in Counter(content).items() if val > 1]
256+
257+
show_table(
258+
{
259+
"KEY DUPLICATES": duplicates if duplicates
260+
else ["There are not duplicated keys"]
261+
},
262+
color=Fore.LIGHTRED_EX,
263+
fmt="simple"
264+
)
265+
266+
267+
if __name__ == '__main__':
268+
cli()

requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,8 @@ requests
88
simplejson
99
werkzeug>=0.9
1010
urllib3>=1.25.1
11-
feedparser
11+
feedparser
12+
colorama
13+
bs4
14+
tabulate
15+
click

0 commit comments

Comments
 (0)