|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# ============================================================================== |
| 3 | +# |
| 4 | +# FILE: hxlquickmeta |
| 5 | +# |
| 6 | +# USAGE: hxlquickmeta hxlated-data.hxl my-exported-file.example |
| 7 | +# cat hxlated-data.hxl | hxlquickmeta > my-exported-file.example |
| 8 | +# # Via web, in two different terminals, do it |
| 9 | +# hug -f bin/hxlquickmeta |
| 10 | +# ngrok http 8000 |
| 11 | +# |
| 12 | +# DESCRIPTION: hxlquickmeta is an example script to create other scripts with |
| 13 | +# some bare minimum command line interface that could work. |
| 14 | +# With exception of external libraries, the hxlquickmeta is |
| 15 | +# meant to be somewhat self-contained one-file executable ready |
| 16 | +# to just be added to the path. |
| 17 | +# |
| 18 | +# Hug API can be used to create an ad-hoc web interface to your |
| 19 | +# script. This can be both useful if you are using an software |
| 20 | +# that accepts an URL as data source and you don't want to use |
| 21 | +# this script to save a file locally. |
| 22 | +# |
| 23 | +# OPTIONS: --- |
| 24 | +# |
| 25 | +# REQUIREMENTS: - python3 |
| 26 | +# - libhxl (https://pypi.org/project/libhxl/) |
| 27 | +# - hug (https://github.com/hugapi/hug/) |
| 28 | +# BUGS: --- |
| 29 | +# NOTES: --- |
| 30 | +# AUTHOR: Emerson Rocha <rocha[at]ieee.org> |
| 31 | +# COMPANY: EticaAI |
| 32 | +# LICENSE: Public Domain dedication |
| 33 | +# SPDX-License-Identifier: Unlicense |
| 34 | +# VERSION: v0.6.5 |
| 35 | +# CREATED: 2021-02-17 03:55 UTC |
| 36 | +# REVISION: --- |
| 37 | +# ============================================================================== |
| 38 | + |
| 39 | +import sys |
| 40 | +import os |
| 41 | +import logging |
| 42 | +import argparse |
| 43 | + |
| 44 | +# @see https://github.com/HXLStandard/libhxl-python |
| 45 | +# pip3 install libhxl --upgrade |
| 46 | +# Do not import hxl, to avoid circular imports |
| 47 | +import hxl.converters |
| 48 | +import hxl.filters |
| 49 | +import hxl.io |
| 50 | + |
| 51 | +import tempfile |
| 52 | + |
| 53 | +# @see https://github.com/hugapi/hug |
| 54 | +# pip3 install hug --upgrade |
| 55 | +import hug |
| 56 | + |
| 57 | +# In Python2, sys.stdin is a byte stream; in Python3, it's a text stream |
| 58 | +STDIN = sys.stdin.buffer |
| 59 | + |
| 60 | + |
| 61 | +class HXLQuickMeta: |
| 62 | + """ |
| 63 | + HXLQuickMeta is a classe to export already HXLated data in the format |
| 64 | + example. |
| 65 | + """ |
| 66 | + |
| 67 | + def __init__(self): |
| 68 | + """ |
| 69 | + Constructs all the necessary attributes for the HXLQuickMeta object. |
| 70 | + """ |
| 71 | + self.hxlhelper = None |
| 72 | + self.args = None |
| 73 | + |
| 74 | + # Posix exit codes |
| 75 | + self.EXIT_OK = 0 |
| 76 | + self.EXIT_ERROR = 1 |
| 77 | + self.EXIT_SYNTAX = 2 |
| 78 | + |
| 79 | + def make_args_hxlquickmeta(self): |
| 80 | + |
| 81 | + self.hxlhelper = HXLUtils() |
| 82 | + parser = self.hxlhelper.make_args( |
| 83 | + description=("hxlquickmeta is an example script to create other " |
| 84 | + "scripts with some bare minimum command line " |
| 85 | + "interfaces that could work to export HXL files to " |
| 86 | + "other formats.")) |
| 87 | + |
| 88 | + self.args = parser.parse_args() |
| 89 | + return self.args |
| 90 | + |
| 91 | + def execute_cli(self, args, |
| 92 | + stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr): |
| 93 | + """ |
| 94 | + The execute_cli is the main entrypoint of HXLQuickMeta. When |
| 95 | + called will convert the HXL source to example format. |
| 96 | + """ |
| 97 | + |
| 98 | + # NOTE: the next lines, in fact, only generate an csv outut. So you |
| 99 | + # can use as starting point. |
| 100 | + with self.hxlhelper.make_source(args, stdin) as source, \ |
| 101 | + self.hxlhelper.make_output(args, stdout) as output: |
| 102 | + hxl.io.write_hxl(output.output, source, |
| 103 | + show_tags=not args.strip_tags) |
| 104 | + |
| 105 | + return self.EXIT_OK |
| 106 | + |
| 107 | + def execute_web(self, source_url, stdin=STDIN, stdout=sys.stdout, |
| 108 | + stderr=sys.stderr, hxlmeta=False): |
| 109 | + """ |
| 110 | + The execute_web is the main entrypoint of HXL2Tab when this class is |
| 111 | + called outside command line interface, like the build in HTTP use with |
| 112 | + hug |
| 113 | + """ |
| 114 | + |
| 115 | + # TODO: the execute_web needs to output the tabfile with correct |
| 116 | + # mimetype, compression, etc |
| 117 | + # (fititnt, 2021-02-07 15:59 UTC) |
| 118 | + |
| 119 | + self.hxlhelper = HXLUtils() |
| 120 | + |
| 121 | + try: |
| 122 | + temp_input = tempfile.NamedTemporaryFile('w') |
| 123 | + temp_output = tempfile.NamedTemporaryFile('w') |
| 124 | + |
| 125 | + webargs = type('obj', (object,), { |
| 126 | + "infile": source_url, |
| 127 | + "sheet_index": None, |
| 128 | + "selector": None, |
| 129 | + 'sheet': None, |
| 130 | + 'http_header': None, |
| 131 | + 'ignore_certs': False |
| 132 | + }) |
| 133 | + |
| 134 | + with self.hxlhelper.make_source(webargs, stdin) as source: |
| 135 | + for line in source.gen_csv(True, True): |
| 136 | + temp_input.write(line) |
| 137 | + |
| 138 | + temp_input.seek(0) |
| 139 | + # self.hxl2tab(temp_input.name, temp_output.name, False) |
| 140 | + |
| 141 | + result_file = open(temp_input.name, 'r') |
| 142 | + return result_file.read() |
| 143 | + |
| 144 | + finally: |
| 145 | + temp_input.close() |
| 146 | + temp_output.close() |
| 147 | + |
| 148 | + return self.EXIT_OK |
| 149 | + |
| 150 | + |
| 151 | +class HXLUtils: |
| 152 | + """ |
| 153 | + HXLUtils contains functions from the Console scripts of libhxl-python |
| 154 | + (HXLStandard/libhxl-python/blob/master/hxl/scripts.py) with few changes |
| 155 | + to be used as class (and have one single place to change). |
| 156 | + Last update on this class was 2021-01-25. |
| 157 | +
|
| 158 | + Author: David Megginson |
| 159 | + License: Public Domain |
| 160 | + """ |
| 161 | + |
| 162 | + def __init__(self): |
| 163 | + |
| 164 | + self.logger = logging.getLogger(__name__) |
| 165 | + |
| 166 | + # Posix exit codes |
| 167 | + self.EXIT_OK = 0 |
| 168 | + self.EXIT_ERROR = 1 |
| 169 | + self.EXIT_SYNTAX = 2 |
| 170 | + |
| 171 | + def make_args(self, description, hxl_output=True): |
| 172 | + """Set up parser with default arguments. |
| 173 | + @param description: usage description to show |
| 174 | + @param hxl_output: if True (default), include options for HXL output. |
| 175 | + @returns: an argument parser, partly set up. |
| 176 | + """ |
| 177 | + parser = argparse.ArgumentParser(description=description) |
| 178 | + parser.add_argument( |
| 179 | + 'infile', |
| 180 | + help='HXL file to read (if omitted, use standard input).', |
| 181 | + nargs='?' |
| 182 | + ) |
| 183 | + if hxl_output: |
| 184 | + parser.add_argument( |
| 185 | + 'outfile', |
| 186 | + help='HXL file to write (if omitted, use standard output).', |
| 187 | + nargs='?' |
| 188 | + ) |
| 189 | + parser.add_argument( |
| 190 | + '--sheet', |
| 191 | + help='Select sheet from a workbook (1 is first sheet)', |
| 192 | + metavar='number', |
| 193 | + type=int, |
| 194 | + nargs='?' |
| 195 | + ) |
| 196 | + parser.add_argument( |
| 197 | + '--selector', |
| 198 | + help='JSONPath expression for starting point in JSON input', |
| 199 | + metavar='path', |
| 200 | + nargs='?' |
| 201 | + ) |
| 202 | + parser.add_argument( |
| 203 | + '--http-header', |
| 204 | + help='Custom HTTP header to send with request', |
| 205 | + metavar='header', |
| 206 | + action='append' |
| 207 | + ) |
| 208 | + if hxl_output: |
| 209 | + parser.add_argument( |
| 210 | + '--remove-headers', |
| 211 | + help='Strip text headers from the CSV output', |
| 212 | + action='store_const', |
| 213 | + const=True, |
| 214 | + default=False |
| 215 | + ) |
| 216 | + parser.add_argument( |
| 217 | + '--strip-tags', |
| 218 | + help='Strip HXL tags from the CSV output', |
| 219 | + action='store_const', |
| 220 | + const=True, |
| 221 | + default=False |
| 222 | + ) |
| 223 | + parser.add_argument( |
| 224 | + "--ignore-certs", |
| 225 | + help="Don't verify SSL connections (useful for self-signed)", |
| 226 | + action='store_const', |
| 227 | + const=True, |
| 228 | + default=False |
| 229 | + ) |
| 230 | + parser.add_argument( |
| 231 | + '--log', |
| 232 | + help='Set minimum logging level', |
| 233 | + metavar='debug|info|warning|error|critical|none', |
| 234 | + choices=['debug', 'info', 'warning', 'error', 'critical'], |
| 235 | + default='error' |
| 236 | + ) |
| 237 | + return parser |
| 238 | + |
| 239 | + def add_queries_arg( |
| 240 | + self, |
| 241 | + parser, |
| 242 | + help='Apply only to rows matching at least one query.' |
| 243 | + ): |
| 244 | + parser.add_argument( |
| 245 | + '-q', |
| 246 | + '--query', |
| 247 | + help=help, |
| 248 | + metavar='<tagspec><op><value>', |
| 249 | + action='append' |
| 250 | + ) |
| 251 | + return parser |
| 252 | + |
| 253 | + def do_common_args(self, args): |
| 254 | + """Process standard args""" |
| 255 | + logging.basicConfig( |
| 256 | + format='%(levelname)s (%(name)s): %(message)s', |
| 257 | + level=args.log.upper()) |
| 258 | + |
| 259 | + def make_source(self, args, stdin=STDIN): |
| 260 | + """Create a HXL input source.""" |
| 261 | + |
| 262 | + # construct the input object |
| 263 | + input = self.make_input(args, stdin) |
| 264 | + return hxl.io.data(input) |
| 265 | + |
| 266 | + def make_input(self, args, stdin=sys.stdin, url_or_filename=None): |
| 267 | + """Create an input object""" |
| 268 | + |
| 269 | + if url_or_filename is None: |
| 270 | + url_or_filename = args.infile |
| 271 | + |
| 272 | + # sheet index |
| 273 | + sheet_index = args.sheet |
| 274 | + if sheet_index is not None: |
| 275 | + sheet_index -= 1 |
| 276 | + |
| 277 | + # JSONPath selector |
| 278 | + selector = args.selector |
| 279 | + |
| 280 | + http_headers = self.make_headers(args) |
| 281 | + |
| 282 | + return hxl.io.make_input( |
| 283 | + url_or_filename or stdin, |
| 284 | + sheet_index=sheet_index, |
| 285 | + selector=selector, |
| 286 | + allow_local=True, # TODO: consider change this for execute_web |
| 287 | + http_headers=http_headers, |
| 288 | + verify_ssl=(not args.ignore_certs) |
| 289 | + ) |
| 290 | + |
| 291 | + def make_output(self, args, stdout=sys.stdout): |
| 292 | + """Create an output stream.""" |
| 293 | + if args.outfile: |
| 294 | + return FileOutput(args.outfile) |
| 295 | + else: |
| 296 | + return StreamOutput(stdout) |
| 297 | + |
| 298 | + def make_headers(self, args): |
| 299 | + # get custom headers |
| 300 | + header_strings = [] |
| 301 | + header = os.environ.get("HXL_HTTP_HEADER") |
| 302 | + if header is not None: |
| 303 | + header_strings.append(header) |
| 304 | + if args.http_header is not None: |
| 305 | + header_strings += args.http_header |
| 306 | + http_headers = {} |
| 307 | + for header in header_strings: |
| 308 | + parts = header.partition(':') |
| 309 | + http_headers[parts[0].strip()] = parts[2].strip() |
| 310 | + return http_headers |
| 311 | + |
| 312 | + |
| 313 | +class FileOutput(object): |
| 314 | + """ |
| 315 | + FileOutput contains is based on libhxl-python with no changes.. |
| 316 | + Last update on this class was 2021-01-25. |
| 317 | +
|
| 318 | + Author: David Megginson |
| 319 | + License: Public Domain |
| 320 | + """ |
| 321 | + |
| 322 | + def __init__(self, filename): |
| 323 | + self.output = open(filename, 'w') |
| 324 | + |
| 325 | + def __enter__(self): |
| 326 | + return self |
| 327 | + |
| 328 | + def __exit__(self, value, type, traceback): |
| 329 | + self.output.close() |
| 330 | + |
| 331 | + |
| 332 | +class StreamOutput(object): |
| 333 | + """ |
| 334 | + StreamOutput contains is based on libhxl-python with no changes.. |
| 335 | + Last update on this class was 2021-01-25. |
| 336 | +
|
| 337 | + Author: David Megginson |
| 338 | + License: Public Domain |
| 339 | + """ |
| 340 | + |
| 341 | + def __init__(self, output): |
| 342 | + self.output = output |
| 343 | + |
| 344 | + def __enter__(self): |
| 345 | + return self |
| 346 | + |
| 347 | + def __exit__(self, value, type, traceback): |
| 348 | + pass |
| 349 | + |
| 350 | + def write(self, s): |
| 351 | + self.output.write(s) |
| 352 | + |
| 353 | + |
| 354 | +if __name__ == "__main__": |
| 355 | + |
| 356 | + hxlquickmeta = HXLQuickMeta() |
| 357 | + args = hxlquickmeta.make_args_hxlquickmeta() |
| 358 | + |
| 359 | + hxlquickmeta.execute_cli(args) |
| 360 | + |
| 361 | + |
| 362 | +@hug.format.content_type('text/csv') |
| 363 | +def output_csv(data, response): |
| 364 | + if isinstance(data, dict) and 'errors' in data: |
| 365 | + response.content_type = 'application/json' |
| 366 | + return hug.output_format.json(data) |
| 367 | + response.content_type = 'text/csv' |
| 368 | + if hasattr(data, "read"): |
| 369 | + return data |
| 370 | + |
| 371 | + return str(data).encode("utf8") |
| 372 | + |
| 373 | + |
| 374 | +@hug.get('/hxlquickmeta.csv', output=output_csv) |
| 375 | +def api_hxl2tab(source_url): |
| 376 | + """hxlquickmeta (@see https://github.com/EticaAI/HXL-Data-Science-file-formats) |
| 377 | +
|
| 378 | + Example: |
| 379 | + http://localhost:8000/hxl2tab.tab?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833 |
| 380 | +
|
| 381 | + """ |
| 382 | + |
| 383 | + hxlquickmeta = HXLQuickMeta() |
| 384 | + |
| 385 | + return hxlquickmeta.execute_web(source_url) |
0 commit comments