{
  "_id": "6a10605cacfb0bcc41ca7127",
  "Package": "doc2vec",
  "Type": "Package",
  "Title": "Distributed Representations of Sentences, Documents and Topics",
  "Version": "0.2.2",
  "Authors@R": "c(\nperson('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be', comment = \"R wrapper\"),\nperson('BNOSAC', role = 'cph', comment = \"R wrapper\"),\nperson('hiyijian', role = c('ctb', 'cph'), comment = \"Code in src/doc2vec\"))",
  "Maintainer": "Jan Wijffels <jwijffels@bnosac.be>",
  "Description": "Learn vector representations of sentences, paragraphs or\ndocuments by using the 'Paragraph Vector' algorithms, namely\nthe distributed bag of words ('PV-DBOW') and the distributed\nmemory ('PV-DM') model. The techniques in the package are\ndetailed in the paper \"Distributed Representations of Sentences\nand Documents\" by Mikolov et al. (2014), available at\n<doi:10.48550/arXiv.1405.4053>. The package also provides an\nimplementation to cluster documents based on these embedding\nusing a technique called top2vec. Top2vec finds clusters in\ntext documents by combining techniques to embed documents and\nwords and density-based clustering. It does this by embedding\ndocuments in the semantic space as defined by the 'doc2vec'\nalgorithm. Next it maps these document embeddings to a\nlower-dimensional space using the 'Uniform Manifold\nApproximation and Projection' (UMAP) clustering algorithm and\nfinds dense areas in that space using a 'Hierarchical\nDensity-Based Clustering' technique (HDBSCAN). These dense\nareas are the topic clusters which can be represented by the\ncorresponding topic vector which is an aggregate of the\ndocument embeddings of the documents which are part of that\ntopic cluster. In the same semantic space similar words can be\nfound which are representative of the topic. More details can\nbe found in the paper 'Top2Vec: Distributed Representations of\nTopics' by D. Angelov available at\n<doi:10.48550/arXiv.2008.09470>.",
  "URL": "https://github.com/bnosac/doc2vec",
  "License": "MIT + file LICENSE",
  "Encoding": "UTF-8",
  "LazyData": "true",
  "RoxygenNote": "7.3.2",
  "Repository": "https://bnosac.r-universe.dev",
  "Date/Publication": "2025-11-27 09:58:19 UTC",
  "RemoteUrl": "https://github.com/bnosac/doc2vec",
  "RemoteRef": "HEAD",
  "RemoteSha": "9621e424ac1462ed62413e5f361018ac3cd69d89",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-05-08 08:31:07 UTC",
    "User": "root"
  },
  "Author": "Jan Wijffels [aut, cre, cph] (R wrapper),\nBNOSAC [cph] (R wrapper),\nhiyijian [ctb, cph] (Code in src/doc2vec)",
  "MD5sum": "0437f5adea059902a799f7b17035408f",
  "_user": "bnosac",
  "_type": "src",
  "_file": "doc2vec_0.2.2.tar.gz",
  "_fileid": "2ebb2dffebaafa875a6158b1901727273fec2a68f209010d9bda09a2ba10b6ef",
  "_filesize": 3236285,
  "_sha256": "2ebb2dffebaafa875a6158b1901727273fec2a68f209010d9bda09a2ba10b6ef",
  "_created": "2026-05-08T08:31:07.000Z",
  "_published": "2026-05-22T13:55:40.192Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77393966742,
      "time": 156,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6875119630"
    },
    {
      "job": 77393967066,
      "time": 164,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6875123709"
    },
    {
      "job": 77393967215,
      "time": 163,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875122035"
    },
    {
      "job": 77393967062,
      "time": 154,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875119155"
    },
    {
      "job": 77393967320,
      "time": 163,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6875161706"
    },
    {
      "job": 77393967103,
      "time": 194,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6875151817"
    },
    {
      "job": 77393966626,
      "time": 160,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875172267"
    },
    {
      "job": 77393966545,
      "time": 255,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875180134"
    },
    {
      "job": 77393966368,
      "time": 147,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875074002"
    },
    {
      "job": 77393966035,
      "time": 130,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7162238446"
    },
    {
      "job": 77393967116,
      "time": 218,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6875137864"
    },
    {
      "job": 77393967162,
      "time": 188,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6875129234"
    },
    {
      "job": 77393967153,
      "time": 189,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6875130478"
    }
  ],
  "_buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/bnosac/doc2vec",
  "_commit": {
    "id": "9621e424ac1462ed62413e5f361018ac3cd69d89",
    "author": "Jan Wijffels <jwijffels@bnosac.be>",
    "committer": "Jan Wijffels <jwijffels@bnosac.be>",
    "message": "README\n",
    "time": 1764237499
  },
  "_maintainer": {
    "name": "Jan Wijffels",
    "email": "jwijffels@bnosac.be",
    "login": "jwijffels",
    "uuid": 1710810
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 2.10",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "Rcpp",
      "version": ">= 0.11.5",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "tokenizers.bpe",
      "role": "Suggests"
    },
    {
      "package": "word2vec",
      "version": ">= 0.3.3",
      "role": "Suggests"
    },
    {
      "package": "uwot",
      "role": "Suggests"
    },
    {
      "package": "dbscan",
      "role": "Suggests"
    },
    {
      "package": "udpipe",
      "version": ">= 0.8",
      "role": "Suggests"
    }
  ],
  "_owner": "bnosac",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-48",
      "n": 5
    }
  ],
  "_tags": [
    {
      "name": "0.2.2",
      "date": "2025-11-27"
    }
  ],
  "_topics": [
    "doc2vec",
    "embeddings",
    "natural-language-processing",
    "paragraph2vec",
    "word2vec",
    "cpp"
  ],
  "_stars": 51,
  "_contributors": [
    {
      "user": "jwijffels",
      "count": 88,
      "uuid": 1710810
    }
  ],
  "_userbio": {
    "uuid": 20105826,
    "type": "organization",
    "name": "bnosac",
    "description": "open sourced projects"
  },
  "_downloads": {
    "count": 338,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/doc2vec"
  },
  "_mentions": 27,
  "_devurl": "https://github.com/bnosac/doc2vec",
  "_searchresults": 39,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/doc2vec.html",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/bnosac/doc2vec",
  "_realowner": "bnosac",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2020-12-10"
    },
    {
      "version": "0.1.1",
      "date": "2021-01-21"
    },
    {
      "version": "0.2.0",
      "date": "2021-03-28"
    },
    {
      "version": "0.2.2",
      "date": "2025-11-27"
    }
  ],
  "_exports": [
    "paragraph2vec",
    "paragraph2vec_similarity",
    "read.paragraph2vec",
    "top2vec",
    "txt_count_words",
    "write.paragraph2vec"
  ],
  "_datasets": [
    {
      "name": "be_parliament_2020",
      "title": "Corpus with Questions asked in the Belgium Federal Parliament in 2020",
      "object": "be_parliament_2020",
      "class": [
        "data.frame"
      ],
      "fields": [
        "doc_id",
        "text_nl",
        "text_fr"
      ],
      "rows": 6059,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "as.matrix.paragraph2vec",
      "title": "Get the document or word vectors of a paragraph2vec model",
      "topics": [
        "as.matrix.paragraph2vec"
      ]
    },
    {
      "page": "be_parliament_2020",
      "title": "Corpus with Questions asked in the Belgium Federal Parliament in 2020",
      "topics": [
        "be_parliament_2020"
      ]
    },
    {
      "page": "paragraph2vec",
      "title": "Train a paragraph2vec also known as doc2vec model on text",
      "topics": [
        "paragraph2vec"
      ]
    },
    {
      "page": "paragraph2vec_similarity",
      "title": "Similarity between document / word vectors as used in paragraph2vec",
      "topics": [
        "paragraph2vec_similarity"
      ]
    },
    {
      "page": "predict.paragraph2vec",
      "title": "Predict functionalities for a paragraph2vec model",
      "topics": [
        "predict.paragraph2vec"
      ]
    },
    {
      "page": "read.paragraph2vec",
      "title": "Read a binary paragraph2vec model from disk",
      "topics": [
        "read.paragraph2vec"
      ]
    },
    {
      "page": "summary.top2vec",
      "title": "Get summary information of a top2vec model",
      "topics": [
        "summary.top2vec"
      ]
    },
    {
      "page": "top2vec",
      "title": "Distributed Representations of Topics",
      "topics": [
        "top2vec"
      ]
    },
    {
      "page": "txt_count_words",
      "title": "Count the number of spaces occurring in text",
      "topics": [
        "txt_count_words"
      ]
    },
    {
      "page": "update.top2vec",
      "title": "Update a Top2vec model",
      "topics": [
        "update.top2vec"
      ]
    },
    {
      "page": "write.paragraph2vec",
      "title": "Save a paragraph2vec model to disk",
      "topics": [
        "write.paragraph2vec"
      ]
    }
  ],
  "_readme": "https://github.com/bnosac/doc2vec/raw/HEAD/README.md",
  "_rundeps": [
    "Rcpp"
  ],
  "_sysdeps": [
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    }
  ],
  "_score": 5.997604787460454,
  "_indexed": true,
  "_nocasepkg": "doc2vec",
  "_universes": [
    "bnosac",
    "jwijffels"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:26.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "6385d37e07ced850d6083935127daac9eb3b2375c2211bdffedc9f3ce1417528",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:36.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "33dee035b88c53e4f2eef8bf10b70aa2f3a1ca8c27153e26c9c6e08150597d3e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:36.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "5a96bc0dc143cff553171dba374a17459b07ff6e1abdc547f26ec84c99cf4b7e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:23.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "d6f54d38043595f76887607048dcf45acba5705f99037f56bbd3fc0235e5d9a2",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-05-08T08:35:42.000Z",
      "arch": "aarch64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "68292ed60359dbb7795c6444b6a1beb708af03c92229bc20b4bcbb9160be0e5d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-05-08T08:34:50.000Z",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "97bf8abf4c3a824786bad8d30e62e602e00faef5fd7c19b316e2ba4c57a4f42c",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-05-08T08:36:10.000Z",
      "arch": "aarch64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "80374829d286c41942fbd5c41dcd419112f32b450d54b616672cd5138bbd6867",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2",
      "date": "2026-05-08T08:35:56.000Z",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "a8d0543aa6abd6a422ffced034de342652196c03ec0c8887050ce7728e81cdc6",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:06.000Z",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "ebb38bd359895eb7de7fb2f19cae5f4f6d00f85e9d1bd39494f4090a35a266b8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:07.000Z",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "a97a87cc9722cd2c5d4c3dec4ff65ff59da6e9e0ad4310feb4993fa882bf7d36",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.2.2",
      "date": "2026-05-08T08:33:08.000Z",
      "arch": "x86_64",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "b1758962292d163c242ea891ad4649085308a8b9b0be4f0bb3ba7c6f39684ac8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.2.2",
      "date": "2026-05-22T13:55:03.000Z",
      "arch": "emscripten",
      "commit": "9621e424ac1462ed62413e5f361018ac3cd69d89",
      "fileid": "643c930a8bffc9adc43b5cf81a5380bdac75cbf24c635002336e328ae35b4c6b",
      "status": "success",
      "buildurl": "https://github.com/r-universe/bnosac/actions/runs/25545535621"
    }
  ]
}