Skip to content

Commit eb8de4c

Browse files
committed
hxlquickmeta (#9): created bin/hxlquickmeta based on hxl2example
1 parent ad337aa commit eb8de4c

File tree

2 files changed

+401
-0
lines changed

2 files changed

+401
-0
lines changed

bin/hxlquickmeta

Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
#!/usr/bin/env python3
2+
# ==============================================================================
3+
#
4+
# FILE: hxlquickmeta
5+
#
6+
# USAGE: hxlquickmeta hxlated-data.hxl my-exported-file.example
7+
# cat hxlated-data.hxl | hxlquickmeta > my-exported-file.example
8+
# # Via web, in two different terminals, do it
9+
# hug -f bin/hxlquickmeta
10+
# ngrok http 8000
11+
#
12+
# DESCRIPTION: hxlquickmeta is an example script to create other scripts with
13+
# some bare minimum command line interface that could work.
14+
# With exception of external libraries, the hxlquickmeta is
15+
# meant to be somewhat self-contained one-file executable ready
16+
# to just be added to the path.
17+
#
18+
# Hug API can be used to create an ad-hoc web interface to your
19+
# script. This can be both useful if you are using an software
20+
# that accepts an URL as data source and you don't want to use
21+
# this script to save a file locally.
22+
#
23+
# OPTIONS: ---
24+
#
25+
# REQUIREMENTS: - python3
26+
# - libhxl (https://pypi.org/project/libhxl/)
27+
# - hug (https://github.com/hugapi/hug/)
28+
# BUGS: ---
29+
# NOTES: ---
30+
# AUTHOR: Emerson Rocha <rocha[at]ieee.org>
31+
# COMPANY: EticaAI
32+
# LICENSE: Public Domain dedication
33+
# SPDX-License-Identifier: Unlicense
34+
# VERSION: v0.6.5
35+
# CREATED: 2021-02-17 03:55 UTC
36+
# REVISION: ---
37+
# ==============================================================================
38+
39+
import sys
40+
import os
41+
import logging
42+
import argparse
43+
44+
# @see https://github.com/HXLStandard/libhxl-python
45+
# pip3 install libhxl --upgrade
46+
# Do not import hxl, to avoid circular imports
47+
import hxl.converters
48+
import hxl.filters
49+
import hxl.io
50+
51+
import tempfile
52+
53+
# @see https://github.com/hugapi/hug
54+
# pip3 install hug --upgrade
55+
import hug
56+
57+
# In Python2, sys.stdin is a byte stream; in Python3, it's a text stream
58+
STDIN = sys.stdin.buffer
59+
60+
61+
class HXLQuickMeta:
62+
"""
63+
HXLQuickMeta is a classe to export already HXLated data in the format
64+
example.
65+
"""
66+
67+
def __init__(self):
68+
"""
69+
Constructs all the necessary attributes for the HXLQuickMeta object.
70+
"""
71+
self.hxlhelper = None
72+
self.args = None
73+
74+
# Posix exit codes
75+
self.EXIT_OK = 0
76+
self.EXIT_ERROR = 1
77+
self.EXIT_SYNTAX = 2
78+
79+
def make_args_hxlquickmeta(self):
80+
81+
self.hxlhelper = HXLUtils()
82+
parser = self.hxlhelper.make_args(
83+
description=("hxlquickmeta is an example script to create other "
84+
"scripts with some bare minimum command line "
85+
"interfaces that could work to export HXL files to "
86+
"other formats."))
87+
88+
self.args = parser.parse_args()
89+
return self.args
90+
91+
def execute_cli(self, args,
92+
stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr):
93+
"""
94+
The execute_cli is the main entrypoint of HXLQuickMeta. When
95+
called will convert the HXL source to example format.
96+
"""
97+
98+
# NOTE: the next lines, in fact, only generate an csv outut. So you
99+
# can use as starting point.
100+
with self.hxlhelper.make_source(args, stdin) as source, \
101+
self.hxlhelper.make_output(args, stdout) as output:
102+
hxl.io.write_hxl(output.output, source,
103+
show_tags=not args.strip_tags)
104+
105+
return self.EXIT_OK
106+
107+
def execute_web(self, source_url, stdin=STDIN, stdout=sys.stdout,
108+
stderr=sys.stderr, hxlmeta=False):
109+
"""
110+
The execute_web is the main entrypoint of HXL2Tab when this class is
111+
called outside command line interface, like the build in HTTP use with
112+
hug
113+
"""
114+
115+
# TODO: the execute_web needs to output the tabfile with correct
116+
# mimetype, compression, etc
117+
# (fititnt, 2021-02-07 15:59 UTC)
118+
119+
self.hxlhelper = HXLUtils()
120+
121+
try:
122+
temp_input = tempfile.NamedTemporaryFile('w')
123+
temp_output = tempfile.NamedTemporaryFile('w')
124+
125+
webargs = type('obj', (object,), {
126+
"infile": source_url,
127+
"sheet_index": None,
128+
"selector": None,
129+
'sheet': None,
130+
'http_header': None,
131+
'ignore_certs': False
132+
})
133+
134+
with self.hxlhelper.make_source(webargs, stdin) as source:
135+
for line in source.gen_csv(True, True):
136+
temp_input.write(line)
137+
138+
temp_input.seek(0)
139+
# self.hxl2tab(temp_input.name, temp_output.name, False)
140+
141+
result_file = open(temp_input.name, 'r')
142+
return result_file.read()
143+
144+
finally:
145+
temp_input.close()
146+
temp_output.close()
147+
148+
return self.EXIT_OK
149+
150+
151+
class HXLUtils:
152+
"""
153+
HXLUtils contains functions from the Console scripts of libhxl-python
154+
(HXLStandard/libhxl-python/blob/master/hxl/scripts.py) with few changes
155+
to be used as class (and have one single place to change).
156+
Last update on this class was 2021-01-25.
157+
158+
Author: David Megginson
159+
License: Public Domain
160+
"""
161+
162+
def __init__(self):
163+
164+
self.logger = logging.getLogger(__name__)
165+
166+
# Posix exit codes
167+
self.EXIT_OK = 0
168+
self.EXIT_ERROR = 1
169+
self.EXIT_SYNTAX = 2
170+
171+
def make_args(self, description, hxl_output=True):
172+
"""Set up parser with default arguments.
173+
@param description: usage description to show
174+
@param hxl_output: if True (default), include options for HXL output.
175+
@returns: an argument parser, partly set up.
176+
"""
177+
parser = argparse.ArgumentParser(description=description)
178+
parser.add_argument(
179+
'infile',
180+
help='HXL file to read (if omitted, use standard input).',
181+
nargs='?'
182+
)
183+
if hxl_output:
184+
parser.add_argument(
185+
'outfile',
186+
help='HXL file to write (if omitted, use standard output).',
187+
nargs='?'
188+
)
189+
parser.add_argument(
190+
'--sheet',
191+
help='Select sheet from a workbook (1 is first sheet)',
192+
metavar='number',
193+
type=int,
194+
nargs='?'
195+
)
196+
parser.add_argument(
197+
'--selector',
198+
help='JSONPath expression for starting point in JSON input',
199+
metavar='path',
200+
nargs='?'
201+
)
202+
parser.add_argument(
203+
'--http-header',
204+
help='Custom HTTP header to send with request',
205+
metavar='header',
206+
action='append'
207+
)
208+
if hxl_output:
209+
parser.add_argument(
210+
'--remove-headers',
211+
help='Strip text headers from the CSV output',
212+
action='store_const',
213+
const=True,
214+
default=False
215+
)
216+
parser.add_argument(
217+
'--strip-tags',
218+
help='Strip HXL tags from the CSV output',
219+
action='store_const',
220+
const=True,
221+
default=False
222+
)
223+
parser.add_argument(
224+
"--ignore-certs",
225+
help="Don't verify SSL connections (useful for self-signed)",
226+
action='store_const',
227+
const=True,
228+
default=False
229+
)
230+
parser.add_argument(
231+
'--log',
232+
help='Set minimum logging level',
233+
metavar='debug|info|warning|error|critical|none',
234+
choices=['debug', 'info', 'warning', 'error', 'critical'],
235+
default='error'
236+
)
237+
return parser
238+
239+
def add_queries_arg(
240+
self,
241+
parser,
242+
help='Apply only to rows matching at least one query.'
243+
):
244+
parser.add_argument(
245+
'-q',
246+
'--query',
247+
help=help,
248+
metavar='<tagspec><op><value>',
249+
action='append'
250+
)
251+
return parser
252+
253+
def do_common_args(self, args):
254+
"""Process standard args"""
255+
logging.basicConfig(
256+
format='%(levelname)s (%(name)s): %(message)s',
257+
level=args.log.upper())
258+
259+
def make_source(self, args, stdin=STDIN):
260+
"""Create a HXL input source."""
261+
262+
# construct the input object
263+
input = self.make_input(args, stdin)
264+
return hxl.io.data(input)
265+
266+
def make_input(self, args, stdin=sys.stdin, url_or_filename=None):
267+
"""Create an input object"""
268+
269+
if url_or_filename is None:
270+
url_or_filename = args.infile
271+
272+
# sheet index
273+
sheet_index = args.sheet
274+
if sheet_index is not None:
275+
sheet_index -= 1
276+
277+
# JSONPath selector
278+
selector = args.selector
279+
280+
http_headers = self.make_headers(args)
281+
282+
return hxl.io.make_input(
283+
url_or_filename or stdin,
284+
sheet_index=sheet_index,
285+
selector=selector,
286+
allow_local=True, # TODO: consider change this for execute_web
287+
http_headers=http_headers,
288+
verify_ssl=(not args.ignore_certs)
289+
)
290+
291+
def make_output(self, args, stdout=sys.stdout):
292+
"""Create an output stream."""
293+
if args.outfile:
294+
return FileOutput(args.outfile)
295+
else:
296+
return StreamOutput(stdout)
297+
298+
def make_headers(self, args):
299+
# get custom headers
300+
header_strings = []
301+
header = os.environ.get("HXL_HTTP_HEADER")
302+
if header is not None:
303+
header_strings.append(header)
304+
if args.http_header is not None:
305+
header_strings += args.http_header
306+
http_headers = {}
307+
for header in header_strings:
308+
parts = header.partition(':')
309+
http_headers[parts[0].strip()] = parts[2].strip()
310+
return http_headers
311+
312+
313+
class FileOutput(object):
314+
"""
315+
FileOutput contains is based on libhxl-python with no changes..
316+
Last update on this class was 2021-01-25.
317+
318+
Author: David Megginson
319+
License: Public Domain
320+
"""
321+
322+
def __init__(self, filename):
323+
self.output = open(filename, 'w')
324+
325+
def __enter__(self):
326+
return self
327+
328+
def __exit__(self, value, type, traceback):
329+
self.output.close()
330+
331+
332+
class StreamOutput(object):
333+
"""
334+
StreamOutput contains is based on libhxl-python with no changes..
335+
Last update on this class was 2021-01-25.
336+
337+
Author: David Megginson
338+
License: Public Domain
339+
"""
340+
341+
def __init__(self, output):
342+
self.output = output
343+
344+
def __enter__(self):
345+
return self
346+
347+
def __exit__(self, value, type, traceback):
348+
pass
349+
350+
def write(self, s):
351+
self.output.write(s)
352+
353+
354+
if __name__ == "__main__":
355+
356+
hxlquickmeta = HXLQuickMeta()
357+
args = hxlquickmeta.make_args_hxlquickmeta()
358+
359+
hxlquickmeta.execute_cli(args)
360+
361+
362+
@hug.format.content_type('text/csv')
363+
def output_csv(data, response):
364+
if isinstance(data, dict) and 'errors' in data:
365+
response.content_type = 'application/json'
366+
return hug.output_format.json(data)
367+
response.content_type = 'text/csv'
368+
if hasattr(data, "read"):
369+
return data
370+
371+
return str(data).encode("utf8")
372+
373+
374+
@hug.get('/hxlquickmeta.csv', output=output_csv)
375+
def api_hxl2tab(source_url):
376+
"""hxlquickmeta (@see https://github.com/EticaAI/HXL-Data-Science-file-formats)
377+
378+
Example:
379+
http://localhost:8000/hxl2tab.tab?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833
380+
381+
"""
382+
383+
hxlquickmeta = HXLQuickMeta()
384+
385+
return hxlquickmeta.execute_web(source_url)

0 commit comments

Comments
 (0)