tokenizer.js

A tokenizer written for JavaScript NLP applications

How to install

npm install tokenizer.js

How to declare

import { Tokenizer } from 'tokenizer.js'; // ES6
const Tokenizer = require('tokenizer.js') // Node.js
// Declare a new object
let tokenizer = new Tokenizer(array_of_sentences)
// From JSON
let tokenizer = new Tokenizer()
tokenizer.fromJSON('./path/to/index.json')

How to use

// Returns index generated (e.g. { 'word1': 0, 'word2': 1, 'word3': 3 }).
tokenizer.index;

// Creates new index and overwrites current index.
tokenizer.create(array_of_senteces);

// Check if word exists in index.
tokenizer.exists('example');

// Takes string or number as argument, if string, returns the corresponding number. If number, returns number.
tokenizer.find('word'); tokenizer.find(5);

/* Adds word sttring to index and returns corresponding number. If word exists, returns false
Numbers are assigned incrementally unless any entries were previously removed, 
if so, it takes a number from the empty_slots buffer. 
The empty_slots buffer is stored in tokenizer.index.empty_slots */
tokenizer.add('word');

// Removes word entry and stores its number in the empty_slots buffer.
tokenizer.remove('word');

// Sorts index in ascending order by value, takes no arguments.
tokenizer.sort();

// Returns the length of the index, takes no arguments.
tokenizer.indexLength();

// Encodes a string sentence and returns an array of numbers.
tokenizer.encode("this is a sentence"); // -> [223, 554, 420, 73]

// Decodes an array of numbers and returns a string sentence.
tokenizer.decode([223, 554, 420, 73]); // -> "this is a sentence"

// Exports the index to a JSON (.json) file, takes path to file as argument (default: './index.json').
tokenizer.toJSON('./path/to/file.json');

// Imports an index from a JSON (.json) file, takes path to file as argument (default: './index.json').
tokenizer.fromJSON('./path/to/index.json');

Name		Name	Last commit message	Last commit date
Latest commit History 8 Commits
LICENSE		LICENSE
README.md		README.md
tokenizer.js		tokenizer.js

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

tokenizer.js

How to install

How to declare

How to use

About

Uh oh!

Releases

Packages

Uh oh!

Contributors

Uh oh!

Languages

Folders and files

Latest commit

History

Repository files navigation

tokenizer.js

How to install

How to declare

How to use

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Uh oh!

Contributors

Uh oh!

Languages

Packages