{
  "_id": "6a1efedeb401979e7341a872",
  "Package": "udpipe",
  "Type": "Package",
  "Title": "Tokenization, Parts of Speech Tagging, Lemmatization and\nDependency Parsing with the 'UDPipe' 'NLP' Toolkit",
  "Version": "0.8.16",
  "Maintainer": "Jan Wijffels <jwijffels@bnosac.be>",
  "Authors@R": "c(\nperson('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be', comment = 'R wrapper'),\nperson('BNOSAC', role = 'cph', comment = 'R wrapper'),\nperson(\"Institute of Formal and Applied Linguistics, Faculty of Mathematics and Physics, Charles University in Prague, Czech Republic\", role = 'cph', comment = \"src/udpipe.cpp & src/udpipe.h\"),\nperson('Milan Straka', role = c('aut', 'cph'), email = 'straka@ufal.mff.cuni.cz', comment = \"src/udpipe.cpp & src/udpipe.h\"),\nperson('Jana Straková', role = c('ctb', 'cph'), email = 'strakova@ufal.mff.cuni.cz', comment = \"src/udpipe.cpp & src/udpipe.h\"))",
  "Description": "This natural language processing toolkit provides\nlanguage-agnostic 'tokenization', 'parts of speech tagging',\n'lemmatization' and 'dependency parsing' of raw text. Next to\ntext parsing, the package also allows you to train annotation\nmodels based on data of 'treebanks' in 'CoNLL-U' format as\nprovided at <https://universaldependencies.org/format.html>.\nThe techniques are explained in detail in the paper:\n'Tokenizing, POS Tagging, Lemmatizing and Parsing UD 2.0 with\nUDPipe', available at <doi:10.18653/v1/K17-3009>. The toolkit\nalso contains functionalities for commonly used data\nmanipulations on texts which are enriched with the output of\nthe parser. Namely functionalities and algorithms for\ncollocations, token co-occurrence, document term matrix\nhandling, term frequency inverse document frequency\ncalculations, information retrieval metrics (Okapi BM25),\nhandling of multi-word expressions, keyword detection (Rapid\nAutomatic Keyword Extraction, noun phrase extraction,\nsyntactical patterns) sentiment scoring and semantic similarity\nanalysis.",
  "License": "MPL-2.0",
  "URL": "https://bnosac.github.io/udpipe/en/index.html,\nhttps://github.com/bnosac/udpipe",
  "Encoding": "UTF-8",
  "VignetteBuilder": "knitr",
  "RoxygenNote": "7.3.2",
  "Repository": "https://bnosac.r-universe.dev",
  "Date/Publication": "2026-01-30 08:52:30 UTC",
  "RemoteUrl": "https://github.com/bnosac/udpipe",
  "RemoteRef": "HEAD",
  "RemoteSha": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-05-18 09:25:18 UTC",
    "User": "root"
  },
  "Author": "Jan Wijffels [aut, cre, cph] (R wrapper),\nBNOSAC [cph] (R wrapper),\nInstitute of Formal and Applied Linguistics, Faculty of Mathematics and\nPhysics, Charles University in Prague, Czech Republic [cph]\n(src/udpipe.cpp & src/udpipe.h),\nMilan Straka [aut, cph] (src/udpipe.cpp & src/udpipe.h),\nJana Straková [ctb, cph] (src/udpipe.cpp & src/udpipe.h)",
  "MD5sum": "29f1ec16e7a9cdb1be7cfd279aad87e3",
  "_user": "bnosac",
  "_type": "src",
  "_file": "udpipe_0.8.16.tar.gz",
  "_fileid": "b75e2b1028929a983e8a722f7aecacf22e71ee0519506ff598674910326f085b",
  "_filesize": 3899846,
  "_sha256": "b75e2b1028929a983e8a722f7aecacf22e71ee0519506ff598674910326f085b",
  "_created": "2026-05-18T09:25:18.000Z",
  "_published": "2026-06-02T16:03:42.680Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79115420726,
      "time": 210,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7054178414"
    },
    {
      "job": 79115420411,
      "time": 255,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7054194112"
    },
    {
      "job": 79115420668,
      "time": 215,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054179626"
    },
    {
      "job": 79115420666,
      "time": 218,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054181020"
    },
    {
      "job": 79115420852,
      "time": 182,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7054167489"
    },
    {
      "job": 79115420711,
      "time": 290,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7054206646"
    },
    {
      "job": 79115420191,
      "time": 210,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054177803"
    },
    {
      "job": 79115419703,
      "time": 512,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054285696"
    },
    {
      "job": 79115420342,
      "time": 374,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054103803"
    },
    {
      "job": 79115419350,
      "time": 167,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7363158393"
    },
    {
      "job": 79115420753,
      "time": 287,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7054206317"
    },
    {
      "job": 79115419850,
      "time": 242,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7054189689"
    },
    {
      "job": 79115420331,
      "time": 247,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7054190722"
    }
  ],
  "_buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/bnosac/udpipe",
  "_commit": {
    "id": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
    "author": "jwijffels <jwijffels@bnosac.be>",
    "committer": "GitHub <noreply@github.com>",
    "message": "explicit c++20 (#136)\n\n* persistent_unordered_map_fnv\n",
    "time": 1769763150
  },
  "_maintainer": {
    "name": "Jan Wijffels",
    "email": "jwijffels@bnosac.be",
    "login": "jwijffels",
    "description": "www.bnosac.be",
    "uuid": 1710810
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 2.10",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "Rcpp",
      "version": ">= 0.11.5",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "version": ">= 1.9.6",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "topicmodels",
      "role": "Suggests"
    },
    {
      "package": "lattice",
      "role": "Suggests"
    },
    {
      "package": "parallel",
      "role": "Suggests"
    }
  ],
  "_owner": "bnosac",
  "_selfowned": true,
  "_usedby": 8,
  "_updates": [
    {
      "week": "2025-35",
      "n": 2
    },
    {
      "week": "2025-36",
      "n": 6
    },
    {
      "week": "2025-48",
      "n": 3
    },
    {
      "week": "2026-05",
      "n": 1
    }
  ],
  "_tags": [
    {
      "name": "0.8.12",
      "date": "2025-09-04"
    },
    {
      "name": "0.8.13",
      "date": "2025-11-26"
    },
    {
      "name": "0.8.14",
      "date": "2025-11-26"
    },
    {
      "name": "0.8.15",
      "date": "2025-11-27"
    },
    {
      "name": "0.8.16",
      "date": "2026-01-30"
    }
  ],
  "_topics": [
    "conll",
    "dependency-parser",
    "lemmatization",
    "natural-language-processing",
    "nlp",
    "pos-tagging",
    "r-pkg",
    "rcpp",
    "text-mining",
    "tokenizer",
    "udpipe",
    "cpp"
  ],
  "_stars": 221,
  "_contributors": [
    {
      "user": "jwijffels",
      "count": 403,
      "uuid": 1710810
    },
    {
      "user": "dselivanov",
      "count": 1,
      "uuid": 5123805
    }
  ],
  "_userbio": {
    "uuid": 20105826,
    "type": "organization",
    "name": "bnosac",
    "description": "open sourced projects"
  },
  "_downloads": {
    "count": 6451,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/udpipe"
  },
  "_mentions": 5,
  "_devurl": "https://github.com/bnosac/udpipe",
  "_searchresults": 1314,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/udpipe.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/bnosac/udpipe",
  "_realowner": "bnosac",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2017-09-01"
    },
    {
      "version": "0.1.1",
      "date": "2017-09-13"
    },
    {
      "version": "0.2",
      "date": "2017-11-13"
    },
    {
      "version": "0.2.1",
      "date": "2017-12-06"
    },
    {
      "version": "0.2.2",
      "date": "2017-12-07"
    },
    {
      "version": "0.3",
      "date": "2018-01-15"
    },
    {
      "version": "0.4",
      "date": "2018-02-07"
    },
    {
      "version": "0.5",
      "date": "2018-03-12"
    },
    {
      "version": "0.6",
      "date": "2018-05-14"
    },
    {
      "version": "0.6.1",
      "date": "2018-07-30"
    },
    {
      "version": "0.7",
      "date": "2018-09-10"
    },
    {
      "version": "0.8",
      "date": "2018-12-09"
    },
    {
      "version": "0.8.1",
      "date": "2019-02-15"
    },
    {
      "version": "0.8.2",
      "date": "2019-07-10"
    },
    {
      "version": "0.8.3",
      "date": "2019-07-10"
    },
    {
      "version": "0.8.4",
      "date": "2020-10-10"
    },
    {
      "version": "0.8.4-1",
      "date": "2020-10-12"
    },
    {
      "version": "0.8.5",
      "date": "2020-12-10"
    },
    {
      "version": "0.8.6",
      "date": "2021-06-01"
    },
    {
      "version": "0.8.8",
      "date": "2021-12-02"
    },
    {
      "version": "0.8.9",
      "date": "2022-03-24"
    },
    {
      "version": "0.8.10",
      "date": "2022-11-10"
    },
    {
      "version": "0.8.11",
      "date": "2023-01-06"
    },
    {
      "version": "0.8.12",
      "date": "2025-09-04"
    },
    {
      "version": "0.8.13",
      "date": "2025-11-26"
    },
    {
      "version": "0.8.14",
      "date": "2025-11-26"
    },
    {
      "version": "0.8.15",
      "date": "2025-11-28"
    },
    {
      "version": "0.8.16",
      "date": "2026-01-30"
    }
  ],
  "_exports": [
    "as_conllu",
    "as_cooccurrence",
    "as_fasttext",
    "as_phrasemachine",
    "as_word2vec",
    "cbind_dependencies",
    "cbind_morphological",
    "collocation",
    "cooccurrence",
    "document_term_frequencies",
    "document_term_frequencies_statistics",
    "document_term_matrix",
    "dtm_align",
    "dtm_cbind",
    "dtm_chisq",
    "dtm_colsums",
    "dtm_conform",
    "dtm_cor",
    "dtm_rbind",
    "dtm_remove_lowfreq",
    "dtm_remove_sparseterms",
    "dtm_remove_terms",
    "dtm_remove_tfidf",
    "dtm_reverse",
    "dtm_rowsums",
    "dtm_sample",
    "dtm_svd_similarity",
    "dtm_tfidf",
    "keywords_collocation",
    "keywords_phrases",
    "keywords_rake",
    "paste.data.frame",
    "phrases",
    "strsplit.data.frame",
    "txt_collapse",
    "txt_contains",
    "txt_context",
    "txt_count",
    "txt_freq",
    "txt_grepl",
    "txt_highlight",
    "txt_next",
    "txt_nextgram",
    "txt_overlap",
    "txt_paste",
    "txt_previous",
    "txt_previousgram",
    "txt_recode",
    "txt_recode_ngram",
    "txt_sample",
    "txt_sentiment",
    "txt_show",
    "txt_tagsequence",
    "udpipe",
    "udpipe_accuracy",
    "udpipe_annotate",
    "udpipe_download_model",
    "udpipe_load_model",
    "udpipe_read_conllu",
    "udpipe_train",
    "unique_identifier",
    "unlist_tokens"
  ],
  "_datasets": [
    {
      "name": "brussels_listings",
      "title": "Brussels AirBnB address locations available at www.insideairbnb.com",
      "object": "brussels_listings",
      "file": "brussels_listings.RData",
      "class": [
        "data.frame"
      ],
      "fields": [
        "listing_id",
        "name",
        "host_id",
        "host_name",
        "neighbourhood_group",
        "neighbourhood",
        "latitude",
        "longitude",
        "room_type",
        "price",
        "minimum_nights",
        "number_of_reviews",
        "last_review",
        "reviews_per_month",
        "calculated_host_listings_count",
        "availability_365"
      ],
      "rows": 4903,
      "table": true,
      "tojson": true
    },
    {
      "name": "brussels_reviews",
      "title": "Reviews of AirBnB customers on Brussels address locations available at www.insideairbnb.com",
      "object": "brussels_reviews",
      "file": "brussels_reviews.RData",
      "class": [
        "data.frame"
      ],
      "fields": [
        "id",
        "listing_id",
        "feedback",
        "language"
      ],
      "rows": 1500,
      "table": true,
      "tojson": true
    },
    {
      "name": "brussels_reviews_anno",
      "title": "Reviews of the AirBnB customers which are tokenised, POS tagged and lemmatised",
      "object": "brussels_reviews_anno",
      "file": "brussels_reviews_anno.RData",
      "class": [
        "data.frame"
      ],
      "fields": [
        "doc_id",
        "language",
        "sentence_id",
        "token_id",
        "token",
        "lemma",
        "upos",
        "xpos"
      ],
      "rows": 96143,
      "table": true,
      "tojson": true
    },
    {
      "name": "brussels_reviews_w2v_embeddings_lemma_nl",
      "title": "An example matrix of word embeddings",
      "object": "brussels_reviews_w2v_embeddings_lemma_nl",
      "file": "brussels_reviews_w2v_embeddings_lemma_nl.RData",
      "class": [
        "matrix",
        "array"
      ],
      "fields": {},
      "rows": 2687,
      "table": true,
      "tojson": true
    },
    {
      "name": "udpipe_annotation_params",
      "title": "List with training options set by the UDPipe community when building models based on the Universal Dependencies data",
      "object": "udpipe_annotation_params",
      "file": "udpipe_annotation_params.RData",
      "class": [
        "list"
      ],
      "fields": [],
      "table": false,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "as_conllu",
      "title": "Convert a data.frame to CONLL-U format",
      "topics": [
        "as_conllu"
      ]
    },
    {
      "page": "as_cooccurrence",
      "title": "Convert a matrix to a co-occurrence data.frame",
      "topics": [
        "as_cooccurrence"
      ]
    },
    {
      "page": "as_fasttext",
      "title": "Combine labels and text as used in fasttext",
      "topics": [
        "as_fasttext"
      ]
    },
    {
      "page": "as_phrasemachine",
      "title": "Convert Parts of Speech tags to one-letter tags which can be used to identify phrases based on regular expressions",
      "topics": [
        "as_phrasemachine"
      ]
    },
    {
      "page": "as_word2vec",
      "title": "Convert a matrix of word vectors to word2vec format",
      "topics": [
        "as_word2vec"
      ]
    },
    {
      "page": "as.data.frame.udpipe_connlu",
      "title": "Convert the result of udpipe_annotate to a tidy data frame",
      "topics": [
        "as.data.frame.udpipe_connlu"
      ]
    },
    {
      "page": "as.matrix.cooccurrence",
      "title": "Convert the result of cooccurrence to a sparse matrix",
      "topics": [
        "as.matrix.cooccurrence"
      ]
    },
    {
      "page": "brussels_listings",
      "title": "Brussels AirBnB address locations available at www.insideairbnb.com",
      "topics": [
        "brussels_listings"
      ]
    },
    {
      "page": "brussels_reviews",
      "title": "Reviews of AirBnB customers on Brussels address locations available at www.insideairbnb.com",
      "topics": [
        "brussels_reviews"
      ]
    },
    {
      "page": "brussels_reviews_anno",
      "title": "Reviews of the AirBnB customers which are tokenised, POS tagged and lemmatised",
      "topics": [
        "brussels_reviews_anno"
      ]
    },
    {
      "page": "brussels_reviews_w2v_embeddings_lemma_nl",
      "title": "An example matrix of word embeddings",
      "topics": [
        "brussels_reviews_w2v_embeddings_lemma_nl"
      ]
    },
    {
      "page": "cbind_dependencies",
      "title": "Add the dependency parsing information to an annotated dataset",
      "topics": [
        "cbind_dependencies"
      ]
    },
    {
      "page": "cbind_morphological",
      "title": "Add morphological features to an annotated dataset",
      "topics": [
        "cbind_morphological"
      ]
    },
    {
      "page": "cooccurrence",
      "title": "Create a cooccurence data.frame",
      "topics": [
        "cooccurrence",
        "cooccurrence.character",
        "cooccurrence.cooccurrence",
        "cooccurrence.data.frame"
      ]
    },
    {
      "page": "document_term_frequencies",
      "title": "Aggregate a data.frame to the document/term level by calculating how many times a term occurs per document",
      "topics": [
        "document_term_frequencies",
        "document_term_frequencies.character",
        "document_term_frequencies.data.frame"
      ]
    },
    {
      "page": "document_term_frequencies_statistics",
      "title": "Add Term Frequency, Inverse Document Frequency and Okapi BM25 statistics to the output of document_term_frequencies",
      "topics": [
        "document_term_frequencies_statistics"
      ]
    },
    {
      "page": "document_term_matrix",
      "title": "Create a document/term matrix",
      "topics": [
        "document_term_matrix",
        "document_term_matrix.data.frame",
        "document_term_matrix.default",
        "document_term_matrix.DocumentTermMatrix",
        "document_term_matrix.integer",
        "document_term_matrix.matrix",
        "document_term_matrix.numeric",
        "document_term_matrix.simple_triplet_matrix",
        "document_term_matrix.TermDocumentMatrix"
      ]
    },
    {
      "page": "dtm_align",
      "title": "Reorder a Document-Term-Matrix alongside a vector or data.frame",
      "topics": [
        "dtm_align"
      ]
    },
    {
      "page": "dtm_bind",
      "title": "Combine 2 document term matrices either by rows or by columns",
      "topics": [
        "dtm_bind",
        "dtm_cbind",
        "dtm_rbind"
      ]
    },
    {
      "page": "dtm_chisq",
      "title": "Compare term usage across 2 document groups using the Chi-square Test for Count Data",
      "topics": [
        "dtm_chisq"
      ]
    },
    {
      "page": "dtm_colsums",
      "title": "Column sums and Row sums for document term matrices",
      "topics": [
        "dtm_colsums",
        "dtm_rowsums"
      ]
    },
    {
      "page": "dtm_conform",
      "title": "Make sure a document term matrix has exactly the specified rows and columns",
      "topics": [
        "dtm_conform"
      ]
    },
    {
      "page": "dtm_cor",
      "title": "Pearson Correlation for Sparse Matrices",
      "topics": [
        "dtm_cor"
      ]
    },
    {
      "page": "dtm_remove_lowfreq",
      "title": "Remove terms occurring with low frequency from a Document-Term-Matrix and documents with no terms",
      "topics": [
        "dtm_remove_lowfreq"
      ]
    },
    {
      "page": "dtm_remove_sparseterms",
      "title": "Remove terms with high sparsity from a Document-Term-Matrix",
      "topics": [
        "dtm_remove_sparseterms"
      ]
    },
    {
      "page": "dtm_remove_terms",
      "title": "Remove terms from a Document-Term-Matrix and keep only documents which have a least some terms",
      "topics": [
        "dtm_remove_terms"
      ]
    },
    {
      "page": "dtm_remove_tfidf",
      "title": "Remove terms from a Document-Term-Matrix and documents with no terms based on the term frequency inverse document frequency",
      "topics": [
        "dtm_remove_tfidf"
      ]
    },
    {
      "page": "dtm_reverse",
      "title": "Inverse operation of the document_term_matrix function",
      "topics": [
        "dtm_reverse"
      ]
    },
    {
      "page": "dtm_sample",
      "title": "Random samples and permutations from a Document-Term-Matrix",
      "topics": [
        "dtm_sample"
      ]
    },
    {
      "page": "dtm_svd_similarity",
      "title": "Semantic Similarity to a Singular Value Decomposition",
      "topics": [
        "dtm_svd_similarity"
      ]
    },
    {
      "page": "dtm_tfidf",
      "title": "Term Frequency - Inverse Document Frequency calculation",
      "topics": [
        "dtm_tfidf"
      ]
    },
    {
      "page": "keywords_collocation",
      "title": "Extract collocations - a sequence of terms which follow each other",
      "topics": [
        "collocation",
        "keywords_collocation"
      ]
    },
    {
      "page": "keywords_phrases",
      "title": "Extract phrases - a sequence of terms which follow each other based on a sequence of Parts of Speech tags",
      "topics": [
        "keywords_phrases",
        "phrases"
      ]
    },
    {
      "page": "keywords_rake",
      "title": "Keyword identification using Rapid Automatic Keyword Extraction (RAKE)",
      "topics": [
        "keywords_rake"
      ]
    },
    {
      "page": "paste.data.frame",
      "title": "Concatenate text of each group of data together",
      "topics": [
        "paste.data.frame"
      ]
    },
    {
      "page": "predict.LDA",
      "title": "Predict method for an object of class LDA_VEM or class LDA_Gibbs",
      "topics": [
        "predict.LDA",
        "predict.LDA_Gibbs",
        "predict.LDA_VEM"
      ]
    },
    {
      "page": "strsplit.data.frame",
      "title": "Obtain a tokenised data frame by splitting text alongside a regular expression",
      "topics": [
        "strsplit.data.frame"
      ]
    },
    {
      "page": "syntaxpatterns",
      "title": "Experimental and undocumented querying of syntax patterns",
      "topics": [
        "syntaxpatterns",
        "syntaxpatterns-class"
      ]
    },
    {
      "page": "syntaxrelation",
      "title": "Experimental and undocumented querying of syntax relationships",
      "topics": [
        "&,logical,syntaxrelation-method",
        "&,syntaxrelation,logical-method",
        "syntaxrelation",
        "syntaxrelation-class",
        "|,logical,syntaxrelation-method",
        "|,syntaxrelation,logical-method"
      ]
    },
    {
      "page": "txt_collapse",
      "title": "Collapse a character vector while removing missing data.",
      "topics": [
        "txt_collapse"
      ]
    },
    {
      "page": "txt_contains",
      "title": "Check if text contains a certain pattern",
      "topics": [
        "txt_contains"
      ]
    },
    {
      "page": "txt_context",
      "title": "Based on a vector with a word sequence, get n-grams (looking forward + backward)",
      "topics": [
        "txt_context"
      ]
    },
    {
      "page": "txt_count",
      "title": "Count the number of times a pattern is occurring in text",
      "topics": [
        "txt_count"
      ]
    },
    {
      "page": "txt_freq",
      "title": "Frequency statistics of elements in a vector",
      "topics": [
        "txt_freq"
      ]
    },
    {
      "page": "txt_grepl",
      "title": "Look up a multiple patterns and indicate their presence in text",
      "topics": [
        "txt_grepl"
      ]
    },
    {
      "page": "txt_highlight",
      "title": "Highlight words in a character vector",
      "topics": [
        "txt_highlight"
      ]
    },
    {
      "page": "txt_next",
      "title": "Get the n-th next element of a vector",
      "topics": [
        "txt_next"
      ]
    },
    {
      "page": "txt_nextgram",
      "title": "Based on a vector with a word sequence, get n-grams (looking forward)",
      "topics": [
        "txt_nextgram"
      ]
    },
    {
      "page": "txt_overlap",
      "title": "Get the overlap between 2 vectors",
      "topics": [
        "txt_overlap"
      ]
    },
    {
      "page": "txt_paste",
      "title": "Concatenate strings with options how to handle missing data",
      "topics": [
        "txt_paste"
      ]
    },
    {
      "page": "txt_previous",
      "title": "Get the n-th previous element of a vector",
      "topics": [
        "txt_previous"
      ]
    },
    {
      "page": "txt_previousgram",
      "title": "Based on a vector with a word sequence, get n-grams (looking backward)",
      "topics": [
        "txt_previousgram"
      ]
    },
    {
      "page": "txt_recode",
      "title": "Recode text to other categories",
      "topics": [
        "txt_recode"
      ]
    },
    {
      "page": "txt_recode_ngram",
      "title": "Recode words with compound multi-word expressions",
      "topics": [
        "txt_recode_ngram"
      ]
    },
    {
      "page": "txt_sample",
      "title": "Boilerplate function to sample one element from a vector.",
      "topics": [
        "txt_sample"
      ]
    },
    {
      "page": "txt_sentiment",
      "title": "Perform dictionary-based sentiment analysis on a tokenised data frame",
      "topics": [
        "txt_sentiment"
      ]
    },
    {
      "page": "txt_show",
      "title": "Boilerplate function to cat only 1 element of a character vector.",
      "topics": [
        "txt_show"
      ]
    },
    {
      "page": "txt_tagsequence",
      "title": "Identify a contiguous sequence of tags as 1 being entity",
      "topics": [
        "txt_tagsequence"
      ]
    },
    {
      "page": "udpipe",
      "title": "Tokenising, Lemmatising, Tagging and Dependency Parsing of raw text in TIF format",
      "topics": [
        "udpipe"
      ]
    },
    {
      "page": "udpipe_accuracy",
      "title": "Evaluate the accuracy of your UDPipe model on holdout data",
      "topics": [
        "udpipe_accuracy"
      ]
    },
    {
      "page": "udpipe_annotate",
      "title": "Tokenising, Lemmatising, Tagging and Dependency Parsing Annotation of raw text",
      "topics": [
        "udpipe_annotate"
      ]
    },
    {
      "page": "udpipe_annotation_params",
      "title": "List with training options set by the UDPipe community when building models based on the Universal Dependencies data",
      "topics": [
        "udpipe_annotation_params"
      ]
    },
    {
      "page": "udpipe_download_model",
      "title": "Download an UDPipe model provided by the UDPipe community for a specific language of choice",
      "topics": [
        "udpipe_download_model"
      ]
    },
    {
      "page": "udpipe_load_model",
      "title": "Load an UDPipe model",
      "topics": [
        "udpipe_load_model"
      ]
    },
    {
      "page": "udpipe_read_conllu",
      "title": "Read in a CONLL-U file as a data.frame",
      "topics": [
        "udpipe_read_conllu"
      ]
    },
    {
      "page": "udpipe_train",
      "title": "Train a UDPipe model",
      "topics": [
        "udpipe_train"
      ]
    },
    {
      "page": "unique_identifier",
      "title": "Create a unique identifier for each combination of fields in a data frame",
      "topics": [
        "unique_identifier"
      ]
    },
    {
      "page": "unlist_tokens",
      "title": "Create a data.frame from a list of tokens",
      "topics": [
        "unlist_tokens"
      ]
    }
  ],
  "_readme": "https://github.com/bnosac/udpipe/raw/HEAD/README.md",
  "_rundeps": [
    "data.table",
    "lattice",
    "Matrix",
    "Rcpp"
  ],
  "_sysdeps": [
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    }
  ],
  "_vignettes": [
    {
      "source": "udpipe-usecase-postagging-lemmatisation.Rmd",
      "filename": "udpipe-usecase-postagging-lemmatisation.html",
      "title": "UDPipe Natural Language Processing - Basic Analytical Use Cases",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "UDPipe - Basic Analytics",
        "Start with annotating",
        "Basic frequency statistics",
        "Finding keywords",
        "Co-occurrences",
        "Nouns / adjectives used in same sentence",
        "Nouns / adjectives which follow one another",
        "Correlations",
        "Support in text mining"
      ],
      "created": "2018-02-06 13:15:19",
      "modified": "2021-06-01 11:55:09",
      "commits": 8
    },
    {
      "source": "udpipe-train.Rmd",
      "filename": "udpipe-train.html",
      "title": "UDPipe Natural Language Processing - Model Building",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "General",
        "Model building",
        "Basic example",
        "Providing more details on the model annotation process",
        "Settings for the tokenizer:",
        "Settings for the tagger:",
        "Settings for the dependency parser:",
        "Example",
        "Example on UD 2.6 on German GSD",
        "Support in text mining"
      ],
      "created": "2017-08-31 22:16:04",
      "modified": "2021-06-01 11:55:09",
      "commits": 16
    },
    {
      "source": "udpipe-parallel.Rmd",
      "filename": "udpipe-parallel.html",
      "title": "UDPipe Natural Language Processing - Parallel",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Parallel Annotation",
        "When to run in parallel?",
        "Using other packages allowing parallel computation"
      ],
      "created": "2019-05-17 21:21:27",
      "modified": "2021-06-01 11:55:09",
      "commits": 5
    },
    {
      "source": "udpipe-annotation.Rmd",
      "filename": "udpipe-annotation.html",
      "title": "UDPipe Natural Language Processing - Text Annotation",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "UDPipe - General",
        "udpipe the R package",
        "UDPipe the C++ library",
        "udpipe models",
        "Pre-trained models",
        "Build your own annotator models",
        "Annotate text",
        "Load the model",
        "Annotate your text",
        "Tokenisation, tagging and parsing",
        "Only part of the annotation",
        "My text data is already tokenised",
        "Remarks",
        "Support in text mining"
      ],
      "created": "2017-08-30 22:16:52",
      "modified": "2025-09-04 09:46:29",
      "commits": 19
    },
    {
      "source": "udpipe-usecase-topicmodelling.Rmd",
      "filename": "udpipe-usecase-topicmodelling.html",
      "title": "UDPipe Natural Language Processing - Topic Modelling Use Cases",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "UDPipe for Topic Modelling",
        "Start with annotating",
        "Basic topic modelling",
        "Topic modelling only on specific POS tags",
        "Predict new documents",
        "Interpret topics",
        "Doing the same on the adjectives + nouns",
        "Topic visualisation",
        "Include keywords in topic models",
        "Other option to build document term matricis",
        "Summarising topics",
        "Support in text mining"
      ],
      "created": "2018-03-06 13:13:42",
      "modified": "2021-06-01 11:55:09",
      "commits": 8
    },
    {
      "source": "udpipe-tryitout.Rmd",
      "filename": "udpipe-tryitout.html",
      "title": "UDPipe Natural Language Processing - Try it out",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Example",
        "A small note on encodings"
      ],
      "created": "2018-01-15 10:15:26",
      "modified": "2020-10-09 08:03:27",
      "commits": 9
    },
    {
      "source": "udpipe-universe.Rmd",
      "filename": "udpipe-universe.html",
      "title": "UDPipe Natural Language Processing - Universe",
      "author": "Jan Wijffels",
      "engine": "knitr::rmarkdown",
      "headings": [
        "udpipe universe"
      ],
      "created": "2020-10-09 09:04:30",
      "modified": "2021-12-02 18:48:12",
      "commits": 4
    }
  ],
  "_score": 12.497923962528786,
  "_indexed": true,
  "_nocasepkg": "udpipe",
  "_universes": [
    "bnosac",
    "jwijffels"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.8.16",
      "date": "2026-05-18T09:28:26.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "4fdbd0db0518fb1c3f2b807963dca349ee505015c0b65774e48985a68f3646d9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.8.16",
      "date": "2026-05-18T09:29:04.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "1778840b7f8adbf40d99e11dffc0356ee67de4afc1c04289c89b8dd28a75b894",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.8.16",
      "date": "2026-05-18T09:28:29.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "7233ab9b41d20fc14a06b75a767b0207dbdd412a51aea7bfe841651eba9cd763",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.8.16",
      "date": "2026-05-18T09:28:27.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "713e323c584d316206ee0ab88ebcbcf01f21de039c6b3bf08e3d3082896225b1",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.8.16",
      "date": "2026-05-18T09:27:56.000Z",
      "arch": "aarch64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "7b14ef1a5ec4e4d03b8d8de9ae4cff79fdc8ccc8b45da53ae245356f1256d030",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.8.16",
      "date": "2026-05-18T09:29:02.000Z",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "8f65abc4d931cde71c2a2d512bd7d42db54871394a7b187a057be9f6cb99cb4f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.8.16",
      "date": "2026-05-18T09:28:11.000Z",
      "arch": "aarch64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "7592b9e36d5bf593317752c4209f08e8916f674b617dfc5f32e618800f181350",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.8.16",
      "date": "2026-05-18T09:30:12.000Z",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "bf32007baec0ccb73b628807fbc941422215b82f7228e07741ee4d2d8a5971eb",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.8.16",
      "date": "2026-05-18T09:28:07.000Z",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "9fa82002a54f237f87db1fb0aed19a009514dd8de3bb38309666cb6959ee22cc",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.8.16",
      "date": "2026-05-18T09:27:52.000Z",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "d42ccc55438d67f0fa8fa07ba4545cd92c709f1bfb1b447d816d006ecc58e14e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.8.16",
      "date": "2026-05-18T09:27:51.000Z",
      "arch": "x86_64",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "deb80b9f6e8282947c14dcfbcf5d9645d160cfc6fcdbd8a06effd59689c30919",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.8.16",
      "date": "2026-06-02T16:02:50.000Z",
      "arch": "emscripten",
      "commit": "3e8572cd7e6ee58b22cc30d29210a5f656b3b535",
      "fileid": "8592388f733cdf56411c57803f78d11399c2e6ad4bd0070c4544c777eab4f765",
      "status": "success",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/26024662138"
    }
  ]
}