| | """ Usage: |
| | <file-name> --in=INPUT_FILE --out=OUTPUT_FILE [--debug] |
| | |
| | Convert to tabbed format |
| | """ |
| | |
| | import logging |
| | from pprint import pprint |
| | from pprint import pformat |
| | from docopt import docopt |
| |
|
| | |
| | from oie_readers.oieReader import OieReader |
| | from oie_readers.extraction import Extraction |
| | import ipdb |
| |
|
| | |
| |
|
| | class OpenieFourReader(OieReader): |
| |
|
| | def __init__(self): |
| | self.name = 'OpenIE-4' |
| |
|
| | def read(self, fn): |
| | d = {} |
| | with open(fn) as fin: |
| | for line in fin: |
| | data = line.strip().split('\t') |
| | confidence = data[0] |
| | if not all(data[2:5]): |
| | logging.debug("Skipped line: {}".format(line)) |
| | continue |
| | arg1, rel, arg2 = [s[s.index('(') + 1:s.index(',List(')] for s in data[2:5]] |
| | text = data[5] |
| | curExtraction = Extraction(pred = rel, head_pred_index = -1, sent = text, confidence = float(confidence)) |
| | curExtraction.addArg(arg1) |
| | curExtraction.addArg(arg2) |
| | d[text] = d.get(text, []) + [curExtraction] |
| | self.oie = d |
| |
|
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | args = docopt(__doc__) |
| | inp_fn = args["--in"] |
| | out_fn = args["--out"] |
| | debug = args["--debug"] |
| | if debug: |
| | logging.basicConfig(level = logging.DEBUG) |
| | else: |
| | logging.basicConfig(level = logging.INFO) |
| |
|
| |
|
| | oie = OpenieFourReader() |
| | oie.read(inp_fn) |
| | oie.output_tabbed(out_fn) |
| |
|
| | logging.info("DONE") |
| |
|