{
  "_id": "6a104629acfb0bcc41c9ea6e",
  "Package": "corpustools",
  "Version": "0.5.2",
  "Date": "2025-07-07",
  "Title": "Managing, Querying and Analyzing Tokenized Text",
  "Description": "Provides text analysis in R, focusing on the use of a\ntokenized text format. In this format, the positions of tokens\nare maintained, and each token can be annotated (e.g.,\npart-of-speech tags, dependency relations). Prominent features\ninclude advanced Lucene-like querying for specific tokens or\ncontexts (e.g., documents, sentences), similarity statistics\nfor words and documents, exporting to DTM for compatibility\nwith many text analysis packages, and the possibility to\nreconstruct original text from tokens to facilitate\ninterpretation.",
  "Authors@R": "c(person(given = \"Kasper\",\nfamily = \"Welbers\",\nrole = c(\"aut\", \"cre\"),\nemail = \"kasperwelbers@gmail.com\"),\nperson(given = \"Wouter\",\nfamily = \"van Atteveldt\",\nrole = \"aut\"))",
  "Maintainer": "Kasper Welbers <kasperwelbers@gmail.com>",
  "LazyData": "true",
  "Encoding": "UTF-8",
  "License": "GPL-3",
  "URL": "https://github.com/kasperwelbers/corpustools",
  "RoxygenNote": "7.3.2",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libglpk-dev libicu-dev libpng-dev libxml2-dev",
  "Repository": "https://kasperwelbers.r-universe.dev",
  "Date/Publication": "2025-07-10 09:09:04 UTC",
  "RemoteUrl": "https://github.com/kasperwelbers/corpustools",
  "RemoteRef": "HEAD",
  "RemoteSha": "cf98223c175e39b65e15a50e65675f8407ffc452",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-05-10 08:32:48 UTC",
    "User": "root"
  },
  "Author": "Kasper Welbers [aut, cre],\nWouter van Atteveldt [aut]",
  "MD5sum": "90cae844995d519b8c89924252821d66",
  "_user": "kasperwelbers",
  "_type": "src",
  "_file": "corpustools_0.5.2.tar.gz",
  "_fileid": "c31b933ab9cc7820837ad2648eb6bbf4d99d8b256dfa75ed806b4a584e204173",
  "_filesize": 1362384,
  "_sha256": "c31b933ab9cc7820837ad2648eb6bbf4d99d8b256dfa75ed806b4a584e204173",
  "_created": "2026-05-10T08:32:48.000Z",
  "_published": "2026-05-22T12:03:53.413Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77375550550,
      "time": 192,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6902350863"
    },
    {
      "job": 77375550545,
      "time": 202,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6902352457"
    },
    {
      "job": 77375551386,
      "time": 186,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6902350292"
    },
    {
      "job": 77375550939,
      "time": 202,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6902351862"
    },
    {
      "job": 77375551093,
      "time": 145,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6902418433"
    },
    {
      "job": 77375550961,
      "time": 355,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6902443053"
    },
    {
      "job": 77375550667,
      "time": 167,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6911058322"
    },
    {
      "job": 77375550784,
      "time": 259,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6902460421"
    },
    {
      "job": 77375550521,
      "time": 320,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6902331908"
    },
    {
      "job": 77375550121,
      "time": 151,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7160019367"
    },
    {
      "job": 77375550572,
      "time": 229,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6902354598"
    },
    {
      "job": 77375550717,
      "time": 163,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6902348019"
    },
    {
      "job": 77375550792,
      "time": 215,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6902353086"
    }
  ],
  "_buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/kasperwelbers/corpustools",
  "_commit": {
    "id": "cf98223c175e39b65e15a50e65675f8407ffc452",
    "author": "Kasper Welbers <kasperwelbers@gmail.com>",
    "committer": "Kasper Welbers <kasperwelbers@gmail.com>",
    "message": "updated igraph\n",
    "time": 1752138544
  },
  "_maintainer": {
    "name": "Kasper Welbers",
    "email": "kasperwelbers@gmail.com",
    "login": "kasperwelbers",
    "twitter": "@KasperWelbers",
    "uuid": 6179240
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "RcppProgress",
      "role": "LinkingTo"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "wordcloud",
      "version": ">= 2.5",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "Rcpp",
      "version": ">= 0.12.12",
      "role": "Imports"
    },
    {
      "package": "R6",
      "role": "Imports"
    },
    {
      "package": "udpipe",
      "version": ">= 0.8.3",
      "role": "Imports"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "version": ">= 1.10.4",
      "role": "Imports"
    },
    {
      "package": "quanteda",
      "version": ">= 1.5.1",
      "role": "Imports"
    },
    {
      "package": "igraph",
      "role": "Imports"
    },
    {
      "package": "tokenbrowser",
      "version": ">= 0.1.5",
      "role": "Imports"
    },
    {
      "package": "RNewsflow",
      "version": ">= 1.2.1",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "version": ">= 1.2",
      "role": "Imports"
    },
    {
      "package": "parallel",
      "role": "Imports"
    },
    {
      "package": "pbapply",
      "version": ">= 1.4",
      "role": "Imports"
    },
    {
      "package": "rsyntax",
      "version": ">= 0.1.1",
      "role": "Imports"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "tm",
      "version": ">= 0.6",
      "role": "Suggests"
    },
    {
      "package": "topicmodels",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    }
  ],
  "_owner": "kasperwelbers",
  "_selfowned": true,
  "_usedby": 1,
  "_updates": [
    {
      "week": "2025-28",
      "n": 2
    }
  ],
  "_tags": [],
  "_stars": 32,
  "_contributors": [
    {
      "user": "kasperwelbers",
      "count": 179,
      "uuid": 6179240
    },
    {
      "user": "vanatteveldt",
      "count": 4,
      "uuid": 1736240
    },
    {
      "user": "schochastics",
      "count": 1,
      "uuid": 17147355
    }
  ],
  "_userbio": {
    "uuid": 6179240,
    "type": "user",
    "name": "Kasper Welbers",
    "description": "Associate Professor at VU University Amsterdam, department of Communication Science. Mostly work on research related stuff in R, Typescript and Python."
  },
  "_downloads": {
    "count": 535,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/corpustools"
  },
  "_devurl": "https://github.com/kasperwelbers/corpustools",
  "_searchresults": 184,
  "_topics": [
    "cpp"
  ],
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/corpustools.html",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/kasperwelbers/corpustools",
  "_realowner": "kasperwelbers",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.3",
      "date": "2017-10-03"
    },
    {
      "version": "0.3.1",
      "date": "2017-12-13"
    },
    {
      "version": "0.3.3",
      "date": "2018-04-20"
    },
    {
      "version": "0.4.1",
      "date": "2019-11-20"
    },
    {
      "version": "0.4.2",
      "date": "2020-01-23"
    },
    {
      "version": "0.4.4",
      "date": "2021-01-07"
    },
    {
      "version": "0.4.5",
      "date": "2021-01-13"
    },
    {
      "version": "0.4.6",
      "date": "2021-02-03"
    },
    {
      "version": "0.4.7",
      "date": "2021-02-28"
    },
    {
      "version": "0.4.8",
      "date": "2021-06-25"
    },
    {
      "version": "0.4.9",
      "date": "2022-01-23"
    },
    {
      "version": "0.4.10",
      "date": "2022-05-11"
    },
    {
      "version": "0.5.1",
      "date": "2023-05-08"
    },
    {
      "version": "0.5.2",
      "date": "2025-07-07"
    }
  ],
  "_exports": [
    "agg_label",
    "agg_tcorpus",
    "aggregate_rsyntax",
    "as.tcorpus",
    "backbone_filter",
    "browse_hits",
    "browse_texts",
    "compare_corpus",
    "compare_documents",
    "compare_subset",
    "count_tcorpus",
    "create_tcorpus",
    "docfreq_filter",
    "dtm_wordcloud",
    "ego_semnet",
    "export_span_annotations",
    "feature_associations",
    "feature_stats",
    "fold_rsyntax",
    "freq_filter",
    "get_dfm",
    "get_dtm",
    "get_kwic",
    "get_stopwords",
    "laplace",
    "melt_quanteda_dict",
    "merge_tcorpora",
    "plot_semnet",
    "plot_words",
    "preprocess_tokens",
    "refresh_tcorpus",
    "search_contexts",
    "search_dictionary",
    "search_features",
    "semnet",
    "semnet_window",
    "set_network_attributes",
    "show_udpipe_models",
    "subset_query",
    "tc_plot_tree",
    "tCorpus",
    "tokens_to_tcorpus",
    "top_features",
    "transform_rsyntax",
    "udpipe_clause_tqueries",
    "udpipe_quote_tqueries",
    "udpipe_simplify",
    "udpipe_spanquote_tqueries",
    "udpipe_tcorpus",
    "untokenize"
  ],
  "_datasets": [
    {
      "name": "corenlp_tokens",
      "title": "coreNLP example sentences",
      "object": "corenlp_tokens",
      "class": [
        "data.frame"
      ],
      "fields": [
        "doc_id",
        "sentence",
        "id",
        "token",
        "lemma",
        "CharacterOffsetBegin",
        "CharacterOffsetEnd",
        "POS",
        "NER",
        "Speaker",
        "parent",
        "relation",
        "pos1"
      ],
      "rows": 36,
      "table": true,
      "tojson": true
    },
    {
      "name": "sotu_texts",
      "title": "State of the Union addresses",
      "object": "sotu_texts",
      "class": [
        "data.frame"
      ],
      "fields": [
        "id",
        "date",
        "party",
        "text",
        "president"
      ],
      "rows": 1090,
      "table": true,
      "tojson": true
    },
    {
      "name": "stopwords_list",
      "title": "Basic stopword lists",
      "object": "stopwords_list",
      "class": [
        "list"
      ],
      "fields": [],
      "table": false,
      "tojson": true
    },
    {
      "name": "tc_sotu_udpipe",
      "title": "A tCorpus with a small sample of sotu paragraphs parsed with udpipe",
      "object": "tc_sotu_udpipe",
      "class": [
        "tCorpus",
        "R6"
      ],
      "fields": [],
      "table": false,
      "tojson": false
    }
  ],
  "_help": [
    {
      "page": "add_multitoken_label",
      "title": "Choose and add multitoken strings based on multitoken categories",
      "topics": [
        "add_multitoken_label"
      ]
    },
    {
      "page": "agg_label",
      "title": "Helper function for aggregate_rsyntax",
      "topics": [
        "agg_label"
      ]
    },
    {
      "page": "agg_tcorpus",
      "title": "Aggregate the tokens data",
      "topics": [
        "agg_tcorpus"
      ]
    },
    {
      "page": "aggregate_rsyntax",
      "title": "Aggregate rsyntax annotations",
      "topics": [
        "aggregate_rsyntax"
      ]
    },
    {
      "page": "as.tcorpus",
      "title": "Force an object to be a tCorpus class",
      "topics": [
        "as.tcorpus"
      ]
    },
    {
      "page": "as.tcorpus.default",
      "title": "Force an object to be a tCorpus class",
      "topics": [
        "as.tcorpus.default"
      ]
    },
    {
      "page": "as.tcorpus.tCorpus",
      "title": "Force an object to be a tCorpus class",
      "topics": [
        "as.tcorpus.tCorpus"
      ]
    },
    {
      "page": "backbone_filter",
      "title": "Extract the backbone of a network.",
      "topics": [
        "backbone_filter"
      ]
    },
    {
      "page": "browse_hits",
      "title": "View hits in a browser",
      "topics": [
        "browse_hits"
      ]
    },
    {
      "page": "browse_texts",
      "title": "Create and view a full text browser",
      "topics": [
        "browse_texts"
      ]
    },
    {
      "page": "calc_chi2",
      "title": "Vectorized computation of chi^2 statistic for a 2x2 crosstab containing the values [a, b] [c, d]",
      "topics": [
        "calc_chi2"
      ]
    },
    {
      "page": "compare_corpus",
      "title": "Compare tCorpus vocabulary to that of another (reference) tCorpus",
      "topics": [
        "compare_corpus"
      ]
    },
    {
      "page": "compare_documents",
      "title": "Calculate the similarity of documents",
      "topics": [
        "compare_documents"
      ]
    },
    {
      "page": "compare_subset",
      "title": "Compare vocabulary of a subset of a tCorpus to the rest of the tCorpus",
      "topics": [
        "compare_subset"
      ]
    },
    {
      "page": "corenlp_tokens",
      "title": "coreNLP example sentences",
      "topics": [
        "corenlp_tokens"
      ]
    },
    {
      "page": "count_tcorpus",
      "title": "Count results of search hits, or of a given feature in tokens",
      "topics": [
        "count_tcorpus"
      ]
    },
    {
      "page": "create_tcorpus",
      "title": "Create a tCorpus",
      "topics": [
        "create_tcorpus",
        "create_tcorpus.character",
        "create_tcorpus.corpus",
        "create_tcorpus.data.frame",
        "create_tcorpus.factor"
      ]
    },
    {
      "page": "docfreq_filter",
      "title": "Support function for subset method",
      "topics": [
        "docfreq_filter"
      ]
    },
    {
      "page": "dtm_compare",
      "title": "Compare two document term matrices",
      "topics": [
        "dtm_compare"
      ]
    },
    {
      "page": "dtm_wordcloud",
      "title": "Plot a word cloud from a dtm",
      "topics": [
        "dtm_wordcloud"
      ]
    },
    {
      "page": "ego_semnet",
      "title": "Create an ego network",
      "topics": [
        "ego_semnet"
      ]
    },
    {
      "page": "export_span_annotations",
      "title": "Export span annotations",
      "topics": [
        "export_span_annotations"
      ]
    },
    {
      "page": "feature_associations",
      "title": "Get common nearby features given a query or query hits",
      "topics": [
        "feature_associations"
      ]
    },
    {
      "page": "feature_stats",
      "title": "Feature statistics",
      "topics": [
        "feature_stats"
      ]
    },
    {
      "page": "fold_rsyntax",
      "title": "Fold rsyntax annotations",
      "topics": [
        "fold_rsyntax"
      ]
    },
    {
      "page": "freq_filter",
      "title": "Support function for subset method",
      "topics": [
        "freq_filter"
      ]
    },
    {
      "page": "get_dtm",
      "title": "Create a document term matrix.",
      "topics": [
        "get_dfm",
        "get_dtm"
      ]
    },
    {
      "page": "get_global_i",
      "title": "Compute global feature positions",
      "topics": [
        "get_global_i"
      ]
    },
    {
      "page": "get_kwic",
      "title": "Get keyword-in-context (KWIC) strings",
      "topics": [
        "get_kwic"
      ]
    },
    {
      "page": "get_stopwords",
      "title": "Get a character vector of stopwords",
      "topics": [
        "get_stopwords"
      ]
    },
    {
      "page": "laplace",
      "title": "Laplace (i.e. add constant) smoothing",
      "topics": [
        "laplace"
      ]
    },
    {
      "page": "melt_quanteda_dict",
      "title": "Convert a quanteda dictionary to a long data.table format",
      "topics": [
        "melt_quanteda_dict"
      ]
    },
    {
      "page": "merge_tcorpora",
      "title": "Merge tCorpus objects",
      "topics": [
        "merge_tcorpora"
      ]
    },
    {
      "page": "plot_semnet",
      "title": "Visualize a semnet network",
      "topics": [
        "plot_semnet"
      ]
    },
    {
      "page": "plot_words",
      "title": "Plot a wordcloud with words ordered and coloured according to a dimension (x)",
      "topics": [
        "plot_words"
      ]
    },
    {
      "page": "plot.contextHits",
      "title": "S3 plot for contextHits class",
      "topics": [
        "plot.contextHits"
      ]
    },
    {
      "page": "plot.featureAssociations",
      "title": "visualize feature associations",
      "topics": [
        "plot.featureAssociations"
      ]
    },
    {
      "page": "plot.featureHits",
      "title": "S3 plot for featureHits class",
      "topics": [
        "plot.featureHits"
      ]
    },
    {
      "page": "plot.vocabularyComparison",
      "title": "visualize vocabularyComparison",
      "topics": [
        "plot.vocabularyComparison"
      ]
    },
    {
      "page": "preprocess_tokens",
      "title": "Preprocess tokens in a character vector",
      "topics": [
        "preprocess_tokens"
      ]
    },
    {
      "page": "print.contextHits",
      "title": "S3 print for contextHits class",
      "topics": [
        "print.contextHits"
      ]
    },
    {
      "page": "print.featureHits",
      "title": "S3 print for featureHits class",
      "topics": [
        "print.featureHits"
      ]
    },
    {
      "page": "print.tCorpus",
      "title": "S3 print for tCorpus class",
      "topics": [
        "print.tCorpus"
      ]
    },
    {
      "page": "refresh_tcorpus",
      "title": "Refresh a tCorpus object using the current version of corpustools",
      "topics": [
        "refresh_tcorpus"
      ]
    },
    {
      "page": "require_package",
      "title": "Check if package with given version exists",
      "topics": [
        "require_package"
      ]
    },
    {
      "page": "search_contexts",
      "title": "Search for documents or sentences using Boolean queries",
      "topics": [
        "search_contexts"
      ]
    },
    {
      "page": "search_dictionary",
      "title": "Dictionary lookup",
      "topics": [
        "search_dictionary"
      ]
    },
    {
      "page": "search_features",
      "title": "Find tokens using a Lucene-like search query",
      "topics": [
        "search_features"
      ]
    },
    {
      "page": "semnet",
      "title": "Create a semantic network based on the co-occurence of tokens in documents",
      "topics": [
        "semnet"
      ]
    },
    {
      "page": "semnet_window",
      "title": "Create a semantic network based on the co-occurence of tokens in token windows",
      "topics": [
        "semnet_window"
      ]
    },
    {
      "page": "set_network_attributes",
      "title": "Set some default network attributes for pretty plotting",
      "topics": [
        "set_network_attributes"
      ]
    },
    {
      "page": "sgt",
      "title": "Simple Good Turing smoothing",
      "topics": [
        "sgt"
      ]
    },
    {
      "page": "show_udpipe_models",
      "title": "Show the names of udpipe models",
      "topics": [
        "show_udpipe_models"
      ]
    },
    {
      "page": "sotu_texts",
      "title": "State of the Union addresses",
      "topics": [
        "sotu_texts"
      ]
    },
    {
      "page": "stopwords_list",
      "title": "Basic stopword lists",
      "topics": [
        "stopwords_list"
      ]
    },
    {
      "page": "subset_query",
      "title": "Subset tCorpus token data using a query",
      "topics": [
        "subset_query"
      ]
    },
    {
      "page": "subset.tCorpus",
      "title": "S3 subset for tCorpus class",
      "topics": [
        "subset.tCorpus"
      ]
    },
    {
      "page": "summary.contextHits",
      "title": "S3 summary for contextHits class",
      "topics": [
        "summary.contextHits"
      ]
    },
    {
      "page": "summary.featureHits",
      "title": "S3 summary for featureHits class",
      "topics": [
        "summary.featureHits"
      ]
    },
    {
      "page": "summary.tCorpus",
      "title": "Summary of a tCorpus object",
      "topics": [
        "summary.tCorpus"
      ]
    },
    {
      "page": "tc_plot_tree",
      "title": "Visualize a dependency tree",
      "topics": [
        "tc_plot_tree"
      ]
    },
    {
      "page": "tc_sotu_udpipe",
      "title": "A tCorpus with a small sample of sotu paragraphs parsed with udpipe",
      "topics": [
        "tc_sotu_udpipe"
      ]
    },
    {
      "page": "tCorpus",
      "title": "tCorpus: a corpus class for tokenized texts",
      "topics": [
        "tCorpus",
        "tcorpus"
      ]
    },
    {
      "page": "tCorpus_compare",
      "title": "Corpus comparison",
      "topics": [
        "tCorpus_compare"
      ]
    },
    {
      "page": "tCorpus_create",
      "title": "Creating a tCorpus",
      "topics": [
        "tCorpus_create"
      ]
    },
    {
      "page": "tCorpus_data",
      "title": "Methods and functions for viewing, modifying and subsetting tCorpus data",
      "topics": [
        "tCorpus_data"
      ]
    },
    {
      "page": "tCorpus_docsim",
      "title": "Document similarity",
      "topics": [
        "tCorpus_docsim"
      ]
    },
    {
      "page": "tCorpus_features",
      "title": "Preprocessing, subsetting and analyzing features",
      "topics": [
        "tCorpus_features"
      ]
    },
    {
      "page": "tCorpus_modify_by_reference",
      "title": "Modify tCorpus by reference",
      "topics": [
        "tCorpus_modify_by_reference"
      ]
    },
    {
      "page": "tCorpus_querying",
      "title": "Use Boolean queries to analyze the tCorpus",
      "topics": [
        "tCorpus_querying"
      ]
    },
    {
      "page": "tCorpus_semnet",
      "title": "Feature co-occurrence based semantic network analysis",
      "topics": [
        "tCorpus_semnet"
      ]
    },
    {
      "page": "tCorpus_topmod",
      "title": "Topic modeling",
      "topics": [
        "tCorpus_topmod"
      ]
    },
    {
      "page": "tCorpus-cash-annotate_rsyntax",
      "title": "Annotate tokens based on rsyntax queries",
      "topics": [
        "annotate_rsyntax",
        "tCorpus$annotate_rsyntax"
      ]
    },
    {
      "page": "tCorpus-cash-code_dictionary",
      "title": "Dictionary lookup",
      "topics": [
        "code_dictionary",
        "tCorpus$code_dictionary"
      ]
    },
    {
      "page": "tCorpus-cash-code_features",
      "title": "Code features in a tCorpus based on a search string",
      "topics": [
        "code_features",
        "tCorpus$code_features"
      ]
    },
    {
      "page": "tCorpus-cash-context",
      "title": "Get a context vector",
      "topics": [
        "context",
        "tCorpus$context"
      ]
    },
    {
      "page": "tCorpus-cash-deduplicate",
      "title": "Deduplicate documents",
      "topics": [
        "deduplicate",
        "tCorpus$deduplicate"
      ]
    },
    {
      "page": "tCorpus-cash-delete_columns",
      "title": "Delete column from the data and meta data",
      "topics": [
        "delete_columns",
        "delete_meta_columns",
        "tCorpus$delete_columns",
        "tCorpus$delete_meta_columns"
      ]
    },
    {
      "page": "tCorpus-cash-feats_to_columns",
      "title": "Cast the \"feats\" column in UDpipe tokens to columns",
      "topics": [
        "feats_to_columms",
        "tCorpus$feats_to_columns"
      ]
    },
    {
      "page": "tCorpus-cash-feature_subset",
      "title": "Filter features",
      "topics": [
        "feature_subset",
        "tCorpus$feature_subset"
      ]
    },
    {
      "page": "tCorpus-cash-fold_rsyntax",
      "title": "Fold rsyntax annotations",
      "topics": [
        "tCorpus$fold_rsyntax"
      ]
    },
    {
      "page": "tCorpus-cash-get",
      "title": "Access the data from a tCorpus",
      "topics": [
        "get",
        "get_meta",
        "tCorpus$get",
        "tCorpus$get_meta"
      ]
    },
    {
      "page": "tCorpus-cash-lda_fit",
      "title": "Estimate a LDA topic model",
      "topics": [
        "lda_fit",
        "tCorpus$lda_fit"
      ]
    },
    {
      "page": "tCorpus-cash-merge",
      "title": "Merge the token and meta data.tables of a tCorpus with another data.frame",
      "topics": [
        "merge",
        "merge_meta",
        "tCorpus$merge"
      ]
    },
    {
      "page": "tCorpus-cash-preprocess",
      "title": "Preprocess feature",
      "topics": [
        "preprocess",
        "tCorpus$preprocess"
      ]
    },
    {
      "page": "tCorpus-cash-replace_dictionary",
      "title": "Replace tokens with dictionary match",
      "topics": [
        "replace_dictionary",
        "tCorpus$replace_dictionary"
      ]
    },
    {
      "page": "tCorpus-cash-search_recode",
      "title": "Recode features in a tCorpus based on a search string",
      "topics": [
        "search_recode",
        "tCorpus$search_recode"
      ]
    },
    {
      "page": "tCorpus-cash-set",
      "title": "Modify the token and meta data.tables of a tCorpus",
      "topics": [
        "set",
        "set_meta",
        "tCorpus$set",
        "tCorpus$set_meta"
      ]
    },
    {
      "page": "tCorpus-cash-set_levels",
      "title": "Change levels of factor columns",
      "topics": [
        "set_levels",
        "set_meta_levels",
        "tCorpus$set_levels",
        "tCorpus$set_meta_levels"
      ]
    },
    {
      "page": "tCorpus-cash-set_name",
      "title": "Change column names of data and meta data",
      "topics": [
        "set_meta_name",
        "set_name",
        "tCorpus$set_meta_name",
        "tCorpus$set_name"
      ]
    },
    {
      "page": "tCorpus-cash-subset",
      "title": "Subset a tCorpus",
      "topics": [
        "subset",
        "subset_meta",
        "tCorpus$subset",
        "tCorpus$subset_meta"
      ]
    },
    {
      "page": "tCorpus-cash-subset_query",
      "title": "Subset tCorpus token data using a query",
      "topics": [
        "tCorpus$subset_query"
      ]
    },
    {
      "page": "tCorpus-cash-udpipe_clauses",
      "title": "Add columns indicating who did what",
      "topics": [
        "tCorpus$udpipe_clauses",
        "udpipe_clauses"
      ]
    },
    {
      "page": "tCorpus-cash-udpipe_quotes",
      "title": "Add columns indicating who said what",
      "topics": [
        "tCorpus$udpipe_quotes",
        "udpipe_quotes"
      ]
    },
    {
      "page": "tokens_to_tcorpus",
      "title": "Create a tcorpus based on tokens (i.e. preprocessed texts)",
      "topics": [
        "tokens_to_tcorpus"
      ]
    },
    {
      "page": "tokenWindowOccurence",
      "title": "Gives the window in which a term occured in a matrix.",
      "topics": [
        "tokenWindowOccurence"
      ]
    },
    {
      "page": "top_features",
      "title": "Show top features",
      "topics": [
        "top_features"
      ]
    },
    {
      "page": "transform_rsyntax",
      "title": "Apply rsyntax transformations",
      "topics": [
        "transform_rsyntax"
      ]
    },
    {
      "page": "udpipe_clause_tqueries",
      "title": "Get a list of tqueries for extracting who did what",
      "topics": [
        "udpipe_clause_tqueries"
      ]
    },
    {
      "page": "udpipe_quote_tqueries",
      "title": "Get a list of tqueries for extracting quotes",
      "topics": [
        "udpipe_quote_tqueries"
      ]
    },
    {
      "page": "udpipe_simplify",
      "title": "Simplify tokenIndex created with the udpipe parser",
      "topics": [
        "udpipe_simplify"
      ]
    },
    {
      "page": "udpipe_spanquote_tqueries",
      "title": "Get a list of tqueries for finding candidates for span quotes.",
      "topics": [
        "udpipe_spanquote_tqueries"
      ]
    },
    {
      "page": "udpipe_tcorpus",
      "title": "Create a tCorpus using udpipe",
      "topics": [
        "udpipe_tcorpus",
        "udpipe_tcorpus.character",
        "udpipe_tcorpus.corpus",
        "udpipe_tcorpus.data.frame",
        "udpipe_tcorpus.factor"
      ]
    },
    {
      "page": "untokenize",
      "title": "Reconstruct original texts",
      "topics": [
        "untokenize"
      ]
    }
  ],
  "_readme": "https://github.com/kasperwelbers/corpustools/raw/HEAD/README.md",
  "_rundeps": [
    "base64enc",
    "cli",
    "cpp11",
    "data.table",
    "digest",
    "farver",
    "fastmatch",
    "glue",
    "igraph",
    "ISOcodes",
    "jsonlite",
    "labeling",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "pbapply",
    "pkgconfig",
    "png",
    "quanteda",
    "R6",
    "RColorBrewer",
    "Rcpp",
    "RcppEigen",
    "RcppProgress",
    "rlang",
    "RNewsflow",
    "rsyntax",
    "scales",
    "SnowballC",
    "stopwords",
    "stringi",
    "tidyselect",
    "tokenbrowser",
    "udpipe",
    "vctrs",
    "viridisLite",
    "withr",
    "wordcloud",
    "xml2",
    "yaml"
  ],
  "_sysdeps": [
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    }
  ],
  "_vignettes": [
    {
      "source": "corpustools.Rmd",
      "filename": "corpustools.html",
      "title": "corpustools: Managing, Querying and Analyzing Tokenized Text",
      "author": "by Kasper Welbers and Wouter van Atteveldt",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Creating a tcorpus",
        "creating a tcorpus from full-text",
        "Additional options",
        "Importing a tokenlist",
        "Managing a tCorpus",
        "Adding, removing and mutating columns",
        "Subsetting a tCorpus",
        "Deduplication",
        "Preprocessing",
        "Basic preprocessing",
        "Advanced preprocessing with UDPipe",
        "Create_tcorpus keeps a persistent cache",
        "Using multiple cores",
        "Filtering tokens",
        "Creating a DTM or DFM",
        "Why keep the full corpus intact?",
        "Querying the tcorpus",
        "search_features()",
        "Counting hits and plotting",
        "Associations",
        "Inspect results in full text",
        "Adding query hits as token features",
        "search_contexts()",
        "Subset by search_contexts()",
        "search_dictionary",
        "Text analysis techniques",
        "Semantic networks based on co-occurence",
        "Corpus comparisons",
        "Feature associations",
        "Using the tcorpus R6 methods",
        "Being carefull with shallow copies.",
        "Copying a tCorpus"
      ],
      "created": "2019-08-15 15:07:35",
      "modified": "2025-07-10 09:09:04",
      "commits": 16
    }
  ],
  "_score": 7.247089056049106,
  "_indexed": true,
  "_nocasepkg": "corpustools",
  "_universes": [
    "kasperwelbers"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:42.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "0de7d670820c07ab83a0fb1d341710afe3417cf118be0d48c673b2a17724b697",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:54.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "ed146140e71d55c4027e1d66c56518ee3ef498c51a6ef84109a1c13bf84e8429",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:44.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "8a9cc67f3e931748f65a9532780a0f6f28f54ab29b44cac41a81a001fa3ca32a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:51.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "102bce372138068d5e2f122591adea41eb4bec3cac22c3b56b54bd68ce7c4bbb",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.5.2",
      "date": "2026-05-10T08:46:30.000Z",
      "arch": "aarch64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "b86d6aec3286c206895be0aeef7e1776dad1ae07e875cb764365a4b44acf5a32",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.5.2",
      "date": "2026-05-10T08:48:21.000Z",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "5a605e66d689b9c7ae586e1fc8e6cd4b50ce8c2d56fa632a1d6cc46c75a02ba3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.5.2",
      "date": "2026-05-11T05:37:17.000Z",
      "arch": "aarch64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "fdbb7fd1cceb6b8a74a321e412273f662bcb4c1802882d90565e5ad2fd5918eb",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.5.2",
      "date": "2026-05-10T08:51:55.000Z",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "137530ca03ba027f10699bb74653617d19deaff586a8832ecf570f8f5583a15d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:24.000Z",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "0505af73c1739cec85cde71bbcbfba6527db18822f14c4558e9830ba24ed89dc",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.5.2",
      "date": "2026-05-10T08:34:46.000Z",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "74ad528fe3e61723e84ee44e393270b421d1f8c27a4c4ff326541bd08cc429e3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.5.2",
      "date": "2026-05-10T08:35:37.000Z",
      "arch": "x86_64",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "390f796778fb1880025f292ec3a408adaea380a42c74c2f026dd0f4db2b6080c",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.5.2",
      "date": "2026-05-22T12:03:15.000Z",
      "arch": "emscripten",
      "commit": "cf98223c175e39b65e15a50e65675f8407ffc452",
      "fileid": "133fe9a0f98296dcbfe2d2fcdadec2017fdffa39978dba84e2656b583e623ca5",
      "status": "success",
      "buildurl": "https://github.com/r-universe/kasperwelbers/actions/runs/25624039511"
    }
  ]
}