Compare commits

...

No commits in common. "4.1.0" and "4.5.0" have entirely different histories.
4.1.0 ... 4.5.0

103 changed files with 12842 additions and 191 deletions

View File

@ -1,3 +1,3 @@
4.1.0 4.5.0
last_version: 4.0.0 last_version: 4.4.0
source_branch: feature/SIENTIAPDE-1379 source_branch: feature/mirror-update-pr-10

View File

@ -1,134 +0,0 @@
#!/bin/bash
# Applies current directory content as a patch to an existing git repository
#
# Usage: ./git-apply-patch.sh <config-file>
#
# Config file format:
# GIT_URL=https://github.com/user/repo.git
# GIT_USER=username
# GIT_TOKEN=your_token_or_password
#
# VERSION file format:
# <new_version>
# last_version: <previous_version>
# source_branch: <branch_name> (optional)
set -e
if [ -z "$1" ]; then
echo "Usage: $0 <config-file>"
exit 1
fi
CONFIG_FILE="$1"
if [ ! -f "$CONFIG_FILE" ]; then
echo "Error: Config file '$CONFIG_FILE' not found"
exit 1
fi
# Load config file
source "$CONFIG_FILE"
# Validate required fields
if [ -z "$GIT_URL" ]; then
echo "Error: GIT_URL not defined in config file"
exit 1
fi
if [ -z "$GIT_USER" ]; then
echo "Error: GIT_USER not defined in config file"
exit 1
fi
if [ -z "$GIT_TOKEN" ]; then
echo "Error: GIT_TOKEN not defined in config file"
exit 1
fi
# Read VERSION file
if [ ! -f "VERSION" ]; then
echo "Error: VERSION file not found in current directory"
exit 1
fi
NEW_VERSION=$(sed -n '1p' VERSION)
LAST_VERSION=$(sed -n '2p' VERSION | sed 's/last_version: //')
SOURCE_BRANCH=$(sed -n '3p' VERSION | sed 's/source_branch: //')
if [ -z "$NEW_VERSION" ]; then
echo "Error: New version not found in VERSION file (line 1)"
exit 1
fi
if [ -z "$LAST_VERSION" ]; then
echo "Error: last_version not found in VERSION file (line 2)"
exit 1
fi
echo "New version: $NEW_VERSION"
echo "Last version: $LAST_VERSION"
echo "Source branch: ${SOURCE_BRANCH:-N/A}"
# Build authenticated URL
if [[ "$GIT_URL" == https://* ]]; then
URL_WITHOUT_PROTOCOL="${GIT_URL#https://}"
AUTH_URL="https://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
elif [[ "$GIT_URL" == http://* ]]; then
URL_WITHOUT_PROTOCOL="${GIT_URL#http://}"
AUTH_URL="http://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
else
echo "Error: URL must start with http:// or https://"
exit 1
fi
# Create temp directory for cloning
TEMP_DIR=$(mktemp -d)
CURRENT_DIR=$(pwd)
echo ""
echo "Cloning repository from branch '$LAST_VERSION'..."
git clone --branch "$LAST_VERSION" --single-branch "$AUTH_URL" "$TEMP_DIR" 2>/dev/null || {
echo "Branch '$LAST_VERSION' not found, trying to clone default branch..."
git clone "$AUTH_URL" "$TEMP_DIR"
cd "$TEMP_DIR"
git checkout -b "$LAST_VERSION" 2>/dev/null || git checkout "$LAST_VERSION"
cd "$CURRENT_DIR"
}
echo "Copying current files to cloned repository..."
# Copy all files except .git, temp dir, and config file
rsync -av \
--exclude='.git' \
--exclude='test' \
--exclude="$(basename "$TEMP_DIR")" \
--exclude="$CONFIG_FILE" \
"$CURRENT_DIR/" "$TEMP_DIR/"
cd "$TEMP_DIR"
echo "Creating new branch '$NEW_VERSION'..."
git checkout -b "$NEW_VERSION" 2>/dev/null || git checkout "$NEW_VERSION"
echo "Adding changes..."
git add -A
echo "Committing changes..."
COMMIT_MSG="Release $NEW_VERSION"
if [ -n "$SOURCE_BRANCH" ]; then
COMMIT_MSG="$COMMIT_MSG (from $SOURCE_BRANCH)"
fi
git commit -m "$COMMIT_MSG" || echo "Nothing to commit"
echo "Pushing to remote..."
git push -u origin "$NEW_VERSION"
# Cleanup
cd "$CURRENT_DIR"
rm -rf "$TEMP_DIR"
echo ""
echo "Done! Patch applied and pushed to branch '$NEW_VERSION'"
echo "Based on previous version: '$LAST_VERSION'"

View File

@ -1,8 +0,0 @@
# Git repository configuration
# Copy this file and fill with your credentials
# NEVER commit this file with real credentials!
GIT_URL=http://localhost:35703/aignosi/library-distribution-mirror.git
GIT_USER=aignosi
GIT_TOKEN=r8sA8CPHD9!bt6d

View File

@ -1,62 +1,53 @@
#!/bin/bash #!/bin/bash
# Initializes a git repository and pushes to a remote using credentials from a config file # Initializes a git repository and pushes to a remote using credentials from arguments
# #
# Usage: ./git-init-from-config.sh <config-file> # Usage: ./git-init-from-config.sh <git-url> <git-user> <git-token>
# #
# Config file format (one value per line): # Arguments:
# GIT_URL=https://github.com/user/repo.git # git-url - Repository URL (e.g., https://gitea.example.com/user/repo.git)
# GIT_USER=username # git-user - Git username
# GIT_TOKEN=your_token_or_password # git-token - Git token or password
# #
# The branch name will be read from the VERSION file (first line) # The branch name will be read from the VERSION file (first line)
set -e set -e
if [ -z "$1" ]; then if [ "$#" -lt 3 ]; then
echo "Usage: $0 <config-file>" echo "Usage: $0 <git-url> <git-user> <git-token>"
echo "" echo ""
echo "Config file format:" echo "Arguments:"
echo " GIT_URL=https://github.com/user/repo.git" echo " git-url - Repository URL (e.g., https://gitea.example.com/user/repo.git)"
echo " GIT_USER=username" echo " git-user - Git username"
echo " GIT_TOKEN=your_token_or_password" echo " git-token - Git token or password"
echo " GIT_BRANCH=main (optional)"
exit 1 exit 1
fi fi
CONFIG_FILE="$1" GIT_URL="$1"
GIT_USER="$2"
GIT_TOKEN="$3"
if [ ! -f "$CONFIG_FILE" ]; then
echo "Error: Config file '$CONFIG_FILE' not found"
exit 1
fi
# Load config file
source "$CONFIG_FILE"
# Validate required fields
if [ -z "$GIT_URL" ]; then if [ -z "$GIT_URL" ]; then
echo "Error: GIT_URL not defined in config file" echo "Error: git-url is required"
exit 1 exit 1
fi fi
if [ -z "$GIT_USER" ]; then if [ -z "$GIT_USER" ]; then
echo "Error: GIT_USER not defined in config file" echo "Error: git-user is required"
exit 1 exit 1
fi fi
if [ -z "$GIT_TOKEN" ]; then if [ -z "$GIT_TOKEN" ]; then
echo "Error: GIT_TOKEN not defined in config file" echo "Error: git-token is required"
exit 1 exit 1
fi fi
# Read version from VERSION file (first line)
if [ ! -f "VERSION" ]; then if [ ! -f "VERSION" ]; then
echo "Error: VERSION file not found in current directory" echo "Error: VERSION file not found in current directory"
exit 1 exit 1
fi fi
GIT_BRANCH=$(head -n 1 VERSION) GIT_BRANCH=$(head -n 1 VERSION | tr -d '[:space:]')
if [ -z "$GIT_BRANCH" ]; then if [ -z "$GIT_BRANCH" ]; then
echo "Error: VERSION file is empty" echo "Error: VERSION file is empty"
@ -65,8 +56,6 @@ fi
echo "Version detected: $GIT_BRANCH" echo "Version detected: $GIT_BRANCH"
# Build authenticated URL
# Extract protocol and rest of URL
if [[ "$GIT_URL" == https://* ]]; then if [[ "$GIT_URL" == https://* ]]; then
URL_WITHOUT_PROTOCOL="${GIT_URL#https://}" URL_WITHOUT_PROTOCOL="${GIT_URL#https://}"
AUTH_URL="https://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}" AUTH_URL="https://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
@ -99,4 +88,3 @@ git push -u origin "$GIT_BRANCH"
echo "" echo ""
echo "Done! Repository pushed to $GIT_URL on branch $GIT_BRANCH" echo "Done! Repository pushed to $GIT_URL on branch $GIT_BRANCH"

Binary file not shown.

View File

@ -0,0 +1,590 @@
Metadata-Version: 2.4
Name: airium
Version: 0.2.7
Summary: Easy and quick html builder with natural syntax correspondence (python->html). No templates needed. Serves pure pythonic library with no dependencies.
Home-page: https://gitlab.com/kamichal/airium
Author: Michał Kaczmarczyk
Author-email: michal.s.kaczmarczyk@gmail.com
Maintainer: Michał Kaczmarczyk
Maintainer-email: michal.s.kaczmarczyk@gmail.com
License: MIT
Keywords: natural html generator compiler template-less
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Information Technology
Classifier: Intended Audience :: Science/Research
Classifier: Intended Audience :: System Administrators
Classifier: Intended Audience :: Telecommunications Industry
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Programming Language :: Python
Classifier: Topic :: Database :: Front-Ends
Classifier: Topic :: Documentation
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Scientific/Engineering :: Visualization
Classifier: Topic :: Software Development :: Code Generators
Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Utilities
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: dev
Requires-Dist: check-manifest; extra == "dev"
Requires-Dist: flake8~=7.1; extra == "dev"
Requires-Dist: mypy~=1.10; extra == "dev"
Requires-Dist: pytest-cov~=3.0; extra == "dev"
Requires-Dist: pytest-mock~=3.6; extra == "dev"
Requires-Dist: pytest~=6.2; extra == "dev"
Requires-Dist: types-beautifulsoup4~=4.12; extra == "dev"
Requires-Dist: types-requests~=2.32; extra == "dev"
Provides-Extra: parse
Requires-Dist: requests<3,>=2.12.0; extra == "parse"
Requires-Dist: beautifulsoup4<5.0,>=4.10.0; extra == "parse"
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: description-content-type
Dynamic: home-page
Dynamic: keywords
Dynamic: license
Dynamic: license-file
Dynamic: maintainer
Dynamic: maintainer-email
Dynamic: provides-extra
Dynamic: summary
## Airium
Bidirectional `HTML`-`python` translator.
[![PyPI version](https://img.shields.io/pypi/v/airium.svg)](https://pypi.python.org/pypi/airium/)
[![pipeline status](https://gitlab.com/kamichal/airium/badges/master/pipeline.svg)](https://gitlab.com/kamichal/airium/-/commits/master)
[![coverage report](https://gitlab.com/kamichal/airium/badges/master/coverage.svg)](https://gitlab.com/kamichal/airium/-/commits/master)
[![PyPI pyversion](https://img.shields.io/pypi/pyversions/AIRIUM.svg)](https://pypi.org/project/airium/)
[![PyPI license](https://img.shields.io/pypi/l/AIRIUM.svg)](https://pypi.python.org/pypi/airium/)
[![PyPI status](https://img.shields.io/pypi/status/AIRIUM.svg)](https://pypi.python.org/pypi/airium/)
Key features:
- simple, straight-forward
- template-less (just the python, you may say goodbye to all the templates)
- DOM structure is strictly represented by python indentation (with context-managers)
- gives much cleaner `HTML` than regular templates
- equipped with reverse translator: `HTML` to python
- can output either pretty (default) or minified `HTML` code
# Generating `HTML` code in python using `airium`
#### Basic `HTML` page (hello world)
```python
from airium import Airium
a = Airium()
a('<!DOCTYPE html>')
with a.html(lang="pl"):
with a.head():
a.meta(charset="utf-8")
a.title(_t="Airium example")
with a.body():
with a.h3(id="id23409231", klass='main_header'):
a("Hello World.")
html = str(a) # casting to string extracts the value
# or directly to UTF-8 encoded bytes:
html_bytes = bytes(a) # casting to bytes is a shortcut to str(a).encode('utf-8')
print(html)
```
Prints such a string:
```html
<!DOCTYPE html>
<html lang="pl">
<head>
<meta charset="utf-8" />
<title>Airium example</title>
</head>
<body>
<h3 id="id23409231" class="main_header">
Hello World.
</h3>
</body>
</html>
```
In order to store it as a file, just:
```python
with open('that/file/path.html', 'wb') as f:
f.write(bytes(html))
```
#### Simple image in a div
```python
from airium import Airium
a = Airium()
with a.div():
a.img(src='source.png', alt='alt text')
a('the text')
html_str = str(a)
print(html_str)
```
```html
<div>
<img src="source.png" alt="alt text"/>
the text
</div>
```
#### Table
```python
from airium import Airium
a = Airium()
with a.table(id='table_372'):
with a.tr(klass='header_row'):
a.th(_t='no.')
a.th(_t='Firstname')
a.th(_t='Lastname')
with a.tr():
a.td(_t='1.')
a.td(id='jbl', _t='Jill')
a.td(_t='Smith') # can use _t or text
with a.tr():
a.td(_t='2.')
a.td(_t='Roland', id='rmd')
a.td(_t='Mendel')
table_str = str(a)
print(table_str)
# To store it to a file:
with open('/tmp/airium_www.example.com.py') as f:
f.write(table_str)
```
Now `table_str` contains such a string:
```html
<table id="table_372">
<tr class="header_row">
<th>no.</th>
<th>Firstname</th>
<th>Lastname</th>
</tr>
<tr>
<td>1.</td>
<td id="jbl">Jill</td>
<td>Smith</td>
</tr>
<tr>
<td>2.</td>
<td id="rmd">Roland</td>
<td>Mendel</td>
</tr>
</table>
```
### Chaining shortcut for elements with only one child
_New in version 0.2.2_
Having a structure with large number of `with` statements:
```python
from airium import Airium
a = Airium()
with a.article():
with a.table():
with a.thead():
with a.tr():
a.th(_t='Column 1')
a.th(_t='Column 2')
with a.tbody():
with a.tr():
with a.td():
a.strong(_t='Value 1')
a.td(_t='Value 2')
table_str = str(a)
print(table_str)
```
You may use a shortcut that is equivalent to:
```python
from airium import Airium
a = Airium()
with a.article().table():
with a.thead().tr():
a.th(_t="Column 1")
a.th(_t="Column 2")
with a.tbody().tr():
a.td().strong(_t="Value 1")
a.td(_t="Value 2")
table_str = str(a)
print(table_str)
```
```html
<article>
<table>
<thead>
<tr>
<th>Column 1</th>
<th>Column 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<strong>Value 1</strong>
</td>
<td>Value 2</td>
</tr>
</tbody>
</table>
</article>
```
# Options
### Pretty or Minify
By default, airium biulds `HTML` code indented with spaces and with line breaks being line feed `\n` characters.
It can be changed while creating an `Airium` instance. In general all avaliable arguments whit their default values are:
```python
a = Airium(
base_indent=' ', # str
current_level=0, # int
source_minify=False, # bool
source_line_break_character="\n", # str
)
```
#### minify
That's a mode when size of the code is minimized, i.e. contains as less whitespaces as it's possible.
The option can be enabled with `source_minify` argument, i.e.:
```python
a = Airium(source_minify=True)
```
In case if you need to explicitly add a line break in the source code (not the `<br/>`):
```python
a = Airium(source_minify=True)
a.h1(_t="Here's your table")
with a.table():
with a.tr():
a.break_source_line()
a.th(_t="Cell 11")
a.th(_t="Cell 12")
with a.tr():
a.break_source_line()
a.th(_t="Cell 21")
a.th(_t="Cell 22")
a.break_source_line()
a.p(_t="Another content goes here")
```
Will result with such a code:
```html
<h1>Here's your table</h1><table><tr>
<th>Cell 11</th><th>Cell 12</th></tr><tr>
<th>Cell 21</th><th>Cell 22</th></tr>
</table><p>Another content goes here</p>
```
Note that the `break_source_line` cannot be used
in [context manager chains](#chaining-shortcut-for-elements-with-only-one-child).
#### indent style
The default indent of the generated HTML code has two spaces per each indent level.
You can change it to `\t` or 4 spaces by setting `Airium` constructor argument, e.g.:
```python
a = Airium(base_indent="\t") # one tab symbol
a = Airium(base_indent=" ") # 4 spaces per each indentation level
a = Airium(base_indent=" ") # 1 space per one level
# pick one of the above statements, it can be mixed with other arguments
```
Note that this setting is ignored when `source_minify` argument is set to `True` (see above).
There is a special case when you set the base indent to empty string. It would disable indentation,
but line breaks will be still added. In order to get rid of line breaks, check the `source_minify` argument.
#### indent level
The `current_level` being an integer can be set to non-negative
value, wich will cause `airium` to start indentation with level offset given by the number.
#### line break character
By default, just a line feed (`\n`) is used for terminating lines of the generated code.
You can change it to different style, e.g. `\r\n` or `\r` by setting `source_line_break_character` to the desired value.
```python
a = Airium(source_line_break_character="\r\n") # windows' style
```
Note that the setting has no effect when `source_minify` argument is set to `True` (see above).
# Using airium with web-frameworks
Airium can be used with frameworks like Flask or Django. It can completely replace
template engines, reducing code-files scater, which may bring better code organization, and some other reasons.
Here is an example of using airium with django. It implements reusable `basic_body` and a view called `index`.
```python
# file: your_app/views.py
import contextlib
import inspect
from airium import Airium
from django.http import HttpResponse
@contextlib.contextmanager
def basic_body(a: Airium, useful_name: str = ''):
"""Works like a Django/Ninja template."""
a('<!DOCTYPE html>')
with a.html(lang='en'):
with a.head():
a.meta(charset='utf-8')
a.meta(content='width=device-width, initial-scale=1', name='viewport')
# do not use CSS from this URL in a production, it's just for an educational purpose
a.link(href='https://unpkg.com/@picocss/pico@1.4.1/css/pico.css', rel='stylesheet')
a.title(_t=f'Hello World')
with a.body():
with a.div():
with a.nav(klass='container-fluid'):
with a.ul():
with a.li():
with a.a(klass='contrast', href='./'):
a.strong(_t="⌨ Foo Bar")
with a.ul():
with a.li():
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'auto'}, _t='Auto')
with a.li():
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'light'}, _t='Light')
with a.li():
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'dark'}, _t='Dark')
with a.header(klass='container'):
with a.hgroup():
a.h1(_t=f"You're on the {useful_name}")
a.h2(_t="It's a page made by our automatons with a power of steam engines.")
with a.main(klass='container'):
yield # This is the point where main content gets inserted
with a.footer(klass='container'):
with a.small():
margin = 'margin: auto 10px;'
a.span(_t='© Airium HTML generator example', style=margin)
# do not use JS from this URL in a production, it's just for an educational purpose
a.script(src='https://picocss.com/examples/js/minimal-theme-switcher.js')
def index(request) -> HttpResponse:
a = Airium()
with basic_body(a, f'main page: {request.path}'):
with a.article():
a.h3(_t="Hello World from Django running Airium")
with a.p().small():
a("This bases on ")
with a.a(href="https://picocss.com/examples/company/"):
a("Pico.css / Company example")
with a.p():
a("Instead of a HTML template, airium has been used.")
a("The whole body is generated by a template "
"and the article code looks like that:")
with a.code().pre():
a(inspect.getsource(index))
return HttpResponse(bytes(a)) # from django.http import HttpResponse
```
Route it in `urls.py` just like a regular view:
```python
# file: your_app/urls.py
from django.contrib import admin
from django.urls import path
import your_app
urlpatterns = [
path('index/', your_app.views.index),
path('admin/', admin.site.urls),
]
```
The result ing web page on my machine looks like that:
![Airium/Django templateless example](airium_django_example.png)
# Reverse translation
Airium is equipped with a transpiler `[HTML -> py]`.
It generates python code out of a given `HTML` string.
### Using reverse translator as a binary:
Ensure you have [installed](#installation) `[parse]` extras. Then call in command line:
```bash
airium http://www.example.com
```
That will fetch the document and translate it to python code.
The code calls `airium` statements that reproduce the `HTML` document given.
It may give a clue - how to define `HTML` structure for a given
web page using `airium` package.
To store the translation's result into a file:
```bash
airium http://www.example.com > /tmp/airium_example_com.py
```
You can also parse local `HTML` files:
```bash
airium /path/to/your_file.html > /tmp/airium_my_file.py
```
You may also try to parse your Django templates. I'm not sure if it works,
but there will be probably not much to fix.
### Using reverse translator as python code:
```python
from airium import from_html_to_airium
# assume we have such a page given as a string:
html_str = """\
<!DOCTYPE html>
<html lang="pl">
<head>
<meta charset="utf-8" />
<title>Airium example</title>
</head>
<body>
<h3 id="id23409231" class="main_header">
Hello World.
</h3>
</body>
</html>
"""
# to convert the html into python, just call:
py_str = from_html_to_airium(html_str)
# airium tests ensure that the result of the conversion is equal to the string:
assert py_str == """\
#!/usr/bin/env python
# File generated by reverse AIRIUM translator (version 0.2.7).
# Any change will be overridden on next run.
# flake8: noqa E501 (line too long)
from airium import Airium
a = Airium()
a('<!DOCTYPE html>')
with a.html(lang='pl'):
with a.head():
a.meta(charset='utf-8')
a.title(_t='Airium example')
with a.body():
a.h3(klass='main_header', id='id23409231', _t='Hello World.')
"""
```
### <a name="transpiler_limitations">Transpiler limitations</a>
> so far in version 0.2.2:
- result of translation does not keep exact amount of leading whitespaces
within `<pre>` tags. They come over-indented in python code.
This is not however an issue when code is generated from python to `HTML`.
- although it keeps the proper tags structure, the transpiler does not
chain all the `with` statements, so in some cases the generated
code may be much indented.
- it's not too fast
# <a name="installation">Installation</a>
If you need a new virtual environment, call:
```bash
virtualenv venv
source venv/bin/activate
```
Having it activated - you may install airium like this:
```bash
pip install airium
```
In order to use reverse translation - two additional packages are needed, run:
```bash
pip install airium[parse]
```
Then check if the transpiler works by calling:
```bash
airium --help
```
> Enjoy!

20
mirror/airium/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for airium
</title>
</head>
<body>
<h1>
Links for airium
</h1>
<a href="/airium/airium-0.2.7-py3-none-any.whl#sha256=35e3ae334327b17b7c2fc39bb57ab2c48171ca849f8cf3dff11437d1e054952e" data-dist-info-metadata="sha256=48022884c676a59c85113445ae9e14ad7f149808fb5d62c2660f8c4567489fe5">
airium-0.2.7-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,187 @@
Metadata-Version: 2.1
Name: apeye-core
Version: 1.1.5
Summary: Core (offline) functionality for the apeye library.
Project-URL: Homepage, https://github.com/domdfcoding/apeye-core
Project-URL: Issue Tracker, https://github.com/domdfcoding/apeye-core/issues
Project-URL: Source Code, https://github.com/domdfcoding/apeye-core
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2022, Dominic Davis-Foster
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
License-File: LICENSE
Keywords: url
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: BSD License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
Requires-Python: >=3.6.1
Requires-Dist: domdf-python-tools>=2.6.0
Requires-Dist: idna>=2.5
Description-Content-Type: text/x-rst
===========
apeye-core
===========
.. start short_desc
**Core (offline) functionality for the apeye library.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |actions_linux| image:: https://github.com/domdfcoding/apeye-core/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/apeye-core/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/apeye-core/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/apeye-core/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/apeye-core/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye-core/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye-core/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/apeye-core/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/apeye-core?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/apeye-core?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/apeye-core
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/apeye-core
:target: https://pypi.org/project/apeye-core/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/apeye-core?logo=python&logoColor=white
:target: https://pypi.org/project/apeye-core/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/apeye-core
:target: https://pypi.org/project/apeye-core/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/apeye-core
:target: https://pypi.org/project/apeye-core/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/apeye-core?logo=anaconda
:target: https://anaconda.org/conda-forge/apeye-core
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/apeye-core?label=conda%7Cplatform
:target: https://anaconda.org/conda-forge/apeye-core
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/apeye-core
:target: https://github.com/domdfcoding/apeye-core/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/apeye-core
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/apeye-core/v1.1.5
:target: https://github.com/domdfcoding/apeye-core/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/apeye-core
:target: https://github.com/domdfcoding/apeye-core/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2024
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/apeye-core
:target: https://pypi.org/project/apeye-core/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``apeye-core`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install apeye-core
To install with ``conda``:
.. code-block:: bash
$ conda install -c conda-forge apeye-core
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for apeye-core
</title>
</head>
<body>
<h1>
Links for apeye-core
</h1>
<a href="/apeye-core/apeye_core-1.1.5-py3-none-any.whl#sha256=dc27a93f8c9e246b3b238c5ea51edf6115ab2618ef029b9f2d9a190ec8228fbf" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=751bbcd20a27f156c12183849bc78419fbac8ca5a51c29fb5137e01e6aeb5e78">
apeye_core-1.1.5-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,210 @@
Metadata-Version: 2.1
Name: apeye
Version: 1.4.1
Summary: Handy tools for working with URLs and APIs.
Keywords: api,cache,requests,rest,url
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
Requires-Python: >=3.6.1
Description-Content-Type: text/x-rst
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
Requires-Dist: apeye-core>=1.0.0b2
Requires-Dist: domdf-python-tools>=2.6.0
Requires-Dist: platformdirs>=2.3.0
Requires-Dist: requests>=2.24.0
Requires-Dist: cachecontrol[filecache]>=0.12.6 ; extra == "all"
Requires-Dist: lockfile>=0.12.2 ; extra == "all"
Requires-Dist: cachecontrol[filecache]>=0.12.6 ; extra == "limiter"
Requires-Dist: lockfile>=0.12.2 ; extra == "limiter"
Project-URL: Documentation, https://apeye.readthedocs.io/en/latest
Project-URL: Homepage, https://github.com/domdfcoding/apeye
Project-URL: Issue Tracker, https://github.com/domdfcoding/apeye/issues
Project-URL: Source Code, https://github.com/domdfcoding/apeye
Provides-Extra: all
Provides-Extra: limiter
======
apeye
======
.. start short_desc
**Handy tools for working with URLs and APIs.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/apeye/latest?logo=read-the-docs
:target: https://apeye.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/apeye/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/apeye/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/apeye/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/apeye/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/apeye/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/apeye/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/apeye/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/apeye?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/apeye?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/apeye
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/apeye
:target: https://pypi.org/project/apeye/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/apeye?logo=python&logoColor=white
:target: https://pypi.org/project/apeye/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/apeye
:target: https://pypi.org/project/apeye/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/apeye
:target: https://pypi.org/project/apeye/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/apeye?logo=anaconda
:target: https://anaconda.org/domdfcoding/apeye
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/apeye?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/apeye
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/apeye
:target: https://github.com/domdfcoding/apeye/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/apeye
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/apeye/v1.4.1
:target: https://github.com/domdfcoding/apeye/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/apeye
:target: https://github.com/domdfcoding/apeye/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/apeye
:target: https://pypi.org/project/apeye/
:alt: PyPI - Downloads
.. end shields
``apeye`` provides:
* ``pathlib.Path``\-like objects to represent URLs
* a JSON-backed cache decorator for functions
* a CacheControl_ adapter to limit the rate of requests
See `the documentation`_ for more details.
.. _CacheControl: https://github.com/ionrock/cachecontrol
.. _the documentation: https://apeye.readthedocs.io/en/latest/api/cache.html
Installation
--------------
.. start installation
``apeye`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install apeye
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install apeye
.. end installation
.. attention::
In v0.9.0 and above the ``rate_limiter`` module requires the ``limiter`` extra to be installed:
.. code-block:: bash
$ python -m pip install apeye[limiter]

20
mirror/apeye/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for apeye
</title>
</head>
<body>
<h1>
Links for apeye
</h1>
<a href="/apeye/apeye-1.4.1-py3-none-any.whl#sha256=44e58a9104ec189bf42e76b3a7fe91e2b2879d96d48e9a77e5e32ff699c9204e" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=c76bd745f0ea8d7105ed23a0827bf960cd651e8e071dbdeb62946a390ddf86c1">
apeye-1.4.1-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: boto3 Name: boto3
Version: 1.41.4 Version: 1.41.5
Summary: The AWS SDK for Python Summary: The AWS SDK for Python
Home-page: https://github.com/boto/boto3 Home-page: https://github.com/boto/boto3
Author: Amazon Web Services Author: Amazon Web Services
@ -22,7 +22,7 @@ Classifier: Programming Language :: Python :: 3.14
Requires-Python: >= 3.9 Requires-Python: >= 3.9
License-File: LICENSE License-File: LICENSE
License-File: NOTICE License-File: NOTICE
Requires-Dist: botocore (<1.42.0,>=1.41.4) Requires-Dist: botocore (<1.42.0,>=1.41.5)
Requires-Dist: jmespath (<2.0.0,>=0.7.1) Requires-Dist: jmespath (<2.0.0,>=0.7.1)
Requires-Dist: s3transfer (<0.16.0,>=0.15.0) Requires-Dist: s3transfer (<0.16.0,>=0.15.0)
Provides-Extra: crt Provides-Extra: crt

View File

@ -12,8 +12,8 @@
<h1> <h1>
Links for boto3 Links for boto3
</h1> </h1>
<a href="/boto3/boto3-1.41.4-py3-none-any.whl#sha256=77d84b7ce890a9b0c6a8993f8de106d8cf8138f332a4685e6de453965e60cb24" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=c263458fb50f5617ae8ff675602ce4b3eedbbaa5c7b7ccf58a72adc50ea35e56"> <a href="/boto3/boto3-1.41.5-py3-none-any.whl#sha256=bb278111bfb4c33dca8342bda49c9db7685e43debbfa00cc2a5eb854dd54b745" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=329053bf9a9139cc670ba9b8557fe3e7400b57d3137514c9baf0c3209ac04d1f">
boto3-1.41.4-py3-none-any.whl boto3-1.41.5-py3-none-any.whl
</a> </a>
<br /> <br />
</body> </body>

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.1 Metadata-Version: 2.1
Name: botocore Name: botocore
Version: 1.41.4 Version: 1.41.5
Summary: Low-level, data-driven core of boto 3. Summary: Low-level, data-driven core of boto 3.
Home-page: https://github.com/boto/botocore Home-page: https://github.com/boto/botocore
Author: Amazon Web Services Author: Amazon Web Services

View File

@ -12,8 +12,8 @@
<h1> <h1>
Links for botocore Links for botocore
</h1> </h1>
<a href="/botocore/botocore-1.41.4-py3-none-any.whl#sha256=7143ef845f1d1400dbbf05d999f8c5e8cfaecd6bd84cbfbe5fa0a40e3a9f6353" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=c54e339761f3067ceebafb82873edf4713098abb5ad00b802f347104b1200a04"> <a href="/botocore/botocore-1.41.5-py3-none-any.whl#sha256=3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=867c86c9f400df83088bb210e49402344febc90aa6b10d46a0cd02642ae1096c">
botocore-1.41.4-py3-none-any.whl botocore-1.41.5-py3-none-any.whl
</a> </a>
<br /> <br />
<a href="/botocore/botocore-1.40.70-py3-none-any.whl#sha256=4a394ad25f5d9f1ef0bed610365744523eeb5c22de6862ab25d8c93f9f6d295c" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=ff124fb918cb0210e04c2c4396cb3ad31bbe26884306bf4d35b9535ece1feb27"> <a href="/botocore/botocore-1.40.70-py3-none-any.whl#sha256=4a394ad25f5d9f1ef0bed610365744523eeb5c22de6862ab25d8c93f9f6d295c" data-requires-python="&gt;= 3.9" data-dist-info-metadata="sha256=ff124fb918cb0210e04c2c4396cb3ad31bbe26884306bf4d35b9535ece1feb27">

Binary file not shown.

View File

@ -0,0 +1,78 @@
Metadata-Version: 2.4
Name: certifi
Version: 2025.11.12
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://github.com/certifi/python-certifi
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Project-URL: Source, https://github.com/certifi/python-certifi
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Requires-Python: >=3.7
License-File: LICENSE
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: home-page
Dynamic: license
Dynamic: license-file
Dynamic: project-url
Dynamic: requires-python
Dynamic: summary
Certifi: Python SSL Certificates
================================
Certifi provides Mozilla's carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
Enjoy!
.. _`Requests`: https://requests.readthedocs.io/en/master/
Addition/Removal of Certificates
--------------------------------
Certifi does not support any addition/removal or other modification of the
CA trust store content. This project is intended to provide a reliable and
highly portable root of trust to python deployments. Look to upstream projects
for methods to use alternate trust.

20
mirror/certifi/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for certifi
</title>
</head>
<body>
<h1>
Links for certifi
</h1>
<a href="/certifi/certifi-2025.11.12-py3-none-any.whl#sha256=97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=fc9a6b1aeff595649d1e5aee44129ca2b5e7adfbc10e1bd7ffa291afc1d06cb7">
certifi-2025.11.12-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,763 @@
Metadata-Version: 2.1
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode_backport
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,763 @@
Metadata-Version: 2.1
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode_backport
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,52 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for charset-normalizer
</title>
</head>
<body>
<h1>
Links for charset-normalizer
</h1>
<a href="/charset-normalizer/charset_normalizer-3.4.4-py3-none-any.whl#sha256=7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-py3-none-any.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl#sha256=ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl#sha256=cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl#sha256=cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl#sha256=5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=47aaaa4790e1bdc8c54ab5bf6e35ee86e979b65a95d41e888c72639919cbb5c3">
charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl
</a>
<br />
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=47aaaa4790e1bdc8c54ab5bf6e35ee86e979b65a95d41e888c72639919cbb5c3">
charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,84 @@
Metadata-Version: 2.4
Name: click
Version: 8.3.1
Summary: Composable command line interface toolkit
Maintainer-email: Pallets <contact@palletsprojects.com>
Requires-Python: >=3.10
Description-Content-Type: text/markdown
License-Expression: BSD-3-Clause
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Typing :: Typed
License-File: LICENSE.txt
Requires-Dist: colorama; platform_system == 'Windows'
Project-URL: Changes, https://click.palletsprojects.com/page/changes/
Project-URL: Chat, https://discord.gg/pallets
Project-URL: Documentation, https://click.palletsprojects.com/
Project-URL: Donate, https://palletsprojects.com/donate
Project-URL: Source, https://github.com/pallets/click/
<div align="center"><img src="https://raw.githubusercontent.com/pallets/click/refs/heads/stable/docs/_static/click-name.svg" alt="" height="150"></div>
# Click
Click is a Python package for creating beautiful command line interfaces
in a composable way with as little code as necessary. It's the "Command
Line Interface Creation Kit". It's highly configurable but comes with
sensible defaults out of the box.
It aims to make the process of writing command line tools quick and fun
while also preventing any frustration caused by the inability to
implement an intended CLI API.
Click in three points:
- Arbitrary nesting of commands
- Automatic help page generation
- Supports lazy loading of subcommands at runtime
## A Simple Example
```python
import click
@click.command()
@click.option("--count", default=1, help="Number of greetings.")
@click.option("--name", prompt="Your name", help="The person to greet.")
def hello(count, name):
"""Simple program that greets NAME for a total of COUNT times."""
for _ in range(count):
click.echo(f"Hello, {name}!")
if __name__ == '__main__':
hello()
```
```
$ python hello.py --count=3
Your name: Click
Hello, Click!
Hello, Click!
Hello, Click!
```
## Donate
The Pallets organization develops and supports Click and other popular
packages. In order to grow the community of contributors and users, and
allow the maintainers to devote more time to the projects, [please
donate today][].
[please donate today]: https://palletsprojects.com/donate
## Contributing
See our [detailed contributing documentation][contrib] for many ways to
contribute, including reporting issues, requesting features, asking or answering
questions, and making PRs.
[contrib]: https://palletsprojects.com/contributing/

20
mirror/click/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for click
</title>
</head>
<body>
<h1>
Links for click
</h1>
<a href="/click/click-8.3.1-py3-none-any.whl#sha256=981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6" data-requires-python="&gt;=3.10" data-dist-info-metadata="sha256=5d9781acc004d2085313cf128dfad20ee4b2342a413e9971251d6d6f00fda198">
click-8.3.1-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,211 @@
Metadata-Version: 2.4
Name: consolekit
Version: 1.9.0
Summary: Additional utilities for click.
Keywords: click,terminal
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
License-File: LICENSE
Requires-Dist: click>=7.1.2
Requires-Dist: colorama>=0.4.3; python_version < "3.10" and platform_system == "Windows"
Requires-Dist: deprecation-alias>=0.1.1
Requires-Dist: domdf-python-tools>=3.8.0
Requires-Dist: mistletoe>=0.7.2
Requires-Dist: typing-extensions!=3.10.0.1,>=3.10.0.0
Requires-Dist: coincidence>=0.1.0 ; extra == "all"
Requires-Dist: psutil>=5.8.0 ; extra == "all"
Requires-Dist: pytest>=6.0.0 ; extra == "all"
Requires-Dist: pytest-regressions>=2.0.2 ; extra == "all"
Requires-Dist: psutil>=5.8.0 ; extra == "terminals"
Requires-Dist: coincidence>=0.1.0 ; extra == "testing"
Requires-Dist: pytest>=6.0.0 ; extra == "testing"
Requires-Dist: pytest-regressions>=2.0.2 ; extra == "testing"
Project-URL: Documentation, https://consolekit.readthedocs.io/en/latest
Project-URL: Homepage, https://github.com/domdfcoding/consolekit
Project-URL: Issue Tracker, https://github.com/domdfcoding/consolekit/issues
Project-URL: Source Code, https://github.com/domdfcoding/consolekit
Provides-Extra: all
Provides-Extra: terminals
Provides-Extra: testing
###########
consolekit
###########
.. start short_desc
**Additional utilities for click.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/consolekit/latest?logo=read-the-docs
:target: https://consolekit.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/consolekit/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/consolekit/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/consolekit/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/consolekit/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/consolekit/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/consolekit/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/consolekit/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/consolekit/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/consolekit/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/consolekit?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/consolekit?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/consolekit
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/consolekit
:target: https://pypi.org/project/consolekit/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/consolekit?logo=python&logoColor=white
:target: https://pypi.org/project/consolekit/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/consolekit
:target: https://pypi.org/project/consolekit/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/consolekit
:target: https://pypi.org/project/consolekit/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/consolekit?logo=anaconda
:target: https://anaconda.org/domdfcoding/consolekit
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/consolekit?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/consolekit
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/consolekit
:target: https://github.com/domdfcoding/consolekit/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/consolekit
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/consolekit/v1.9.0
:target: https://github.com/domdfcoding/consolekit/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/consolekit
:target: https://github.com/domdfcoding/consolekit/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/consolekit
:target: https://pypi.org/project/consolekit/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``consolekit`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install consolekit
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install consolekit
.. end installation
Additionally, for better support in terminals,
install `psutil <https://pypi.org/project/psutil/>`_ by specifying the ``terminals`` extra:
.. code-block:: bash
$ python -m pip install consolekit[terminals]
or, if you installed ``consolekit`` through conda:
.. code-block:: bash
$ conda install -c conda-forge psutil

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for consolekit
</title>
</head>
<body>
<h1>
Links for consolekit
</h1>
<a href="/consolekit/consolekit-1.9.0-py3-none-any.whl#sha256=2997e9d18995e024c5338d5f4b20394ec5f962ccefebce1b2a8ffa3bca507cf0" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=43c29372fd3df281ba99e1843b2bd52d261bf65d716eeb89c618418488f09ad7">
consolekit-1.9.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,378 @@
Metadata-Version: 2.4
Name: deprecation-alias
Version: 0.4.0
Summary: A wrapper around 'deprecation' providing support for deprecated aliases.
Project-URL: Homepage, https://github.com/domdfcoding/deprecation-alias
Project-URL: Issue Tracker, https://github.com/domdfcoding/deprecation-alias/issues
Project-URL: Source Code, https://github.com/domdfcoding/deprecation-alias
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and
distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the
copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other
entities that control, are controlled by, or are under common control with
that entity. For the purposes of this definition, "control" means (i) the
power, direct or indirect, to cause the direction or management of such
entity, whether by contract or otherwise, or (ii) ownership of
fifty percent (50%) or more of the outstanding shares, or (iii) beneficial
ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising
permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation source,
and configuration files.
"Object" form shall mean any form resulting from mechanical transformation
or translation of a Source form, including but not limited to compiled
object code, generated documentation, and conversions to
other media types.
"Work" shall mean the work of authorship, whether in Source or Object
form, made available under the License, as indicated by a copyright notice
that is included in or attached to the work (an example is provided in the
Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form,
that is based on (or derived from) the Work and for which the editorial
revisions, annotations, elaborations, or other modifications represent,
as a whole, an original work of authorship. For the purposes of this
License, Derivative Works shall not include works that remain separable
from, or merely link (or bind by name) to the interfaces of, the Work and
Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original
version of the Work and any modifications or additions to that Work or
Derivative Works thereof, that is intentionally submitted to Licensor for
inclusion in the Work by the copyright owner or by an individual or
Legal Entity authorized to submit on behalf of the copyright owner.
For the purposes of this definition, "submitted" means any form of
electronic, verbal, or written communication sent to the Licensor or its
representatives, including but not limited to communication on electronic
mailing lists, source code control systems, and issue tracking systems
that are managed by, or on behalf of, the Licensor for the purpose of
discussing and improving the Work, but excluding communication that is
conspicuously marked or otherwise designated in writing by the copyright
owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on
behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License.
Subject to the terms and conditions of this License, each Contributor
hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
royalty-free, irrevocable copyright license to reproduce, prepare
Derivative Works of, publicly display, publicly perform, sublicense,
and distribute the Work and such Derivative Works in
Source or Object form.
3. Grant of Patent License.
Subject to the terms and conditions of this License, each Contributor
hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
royalty-free, irrevocable (except as stated in this section) patent
license to make, have made, use, offer to sell, sell, import, and
otherwise transfer the Work, where such license applies only to those
patent claims licensable by such Contributor that are necessarily
infringed by their Contribution(s) alone or by combination of their
Contribution(s) with the Work to which such Contribution(s) was submitted.
If You institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
Contribution incorporated within the Work constitutes direct or
contributory patent infringement, then any patent licenses granted to
You under this License for that Work shall terminate as of the date such
litigation is filed.
4. Redistribution.
You may reproduce and distribute copies of the Work or Derivative Works
thereof in any medium, with or without modifications, and in Source or
Object form, provided that You meet the following conditions:
1. You must give any other recipients of the Work or Derivative Works a
copy of this License; and
2. You must cause any modified files to carry prominent notices stating
that You changed the files; and
3. You must retain, in the Source form of any Derivative Works that You
distribute, all copyright, patent, trademark, and attribution notices from
the Source form of the Work, excluding those notices that do not pertain
to any part of the Derivative Works; and
4. If the Work includes a "NOTICE" text file as part of its distribution,
then any Derivative Works that You distribute must include a readable copy
of the attribution notices contained within such NOTICE file, excluding
those notices that do not pertain to any part of the Derivative Works,
in at least one of the following places: within a NOTICE text file
distributed as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or, within a
display generated by the Derivative Works, if and wherever such
third-party notices normally appear. The contents of the NOTICE file are
for informational purposes only and do not modify the License.
You may add Your own attribution notices within Derivative Works that You
distribute, alongside or as an addendum to the NOTICE text from the Work,
provided that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and may
provide additional or different license terms and conditions for use,
reproduction, or distribution of Your modifications, or for any such
Derivative Works as a whole, provided Your use, reproduction, and
distribution of the Work otherwise complies with the conditions
stated in this License.
5. Submission of Contributions.
Unless You explicitly state otherwise, any Contribution intentionally
submitted for inclusion in the Work by You to the Licensor shall be under
the terms and conditions of this License, without any additional
terms or conditions. Notwithstanding the above, nothing herein shall
supersede or modify the terms of any separate license agreement you may
have executed with Licensor regarding such Contributions.
6. Trademarks.
This License does not grant permission to use the trade names, trademarks,
service marks, or product names of the Licensor, except as required for
reasonable and customary use in describing the origin of the Work and
reproducing the content of the NOTICE file.
7. Disclaimer of Warranty.
Unless required by applicable law or agreed to in writing, Licensor
provides the Work (and each Contributor provides its Contributions)
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied, including, without limitation, any warranties
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS
FOR A PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any risks
associated with Your exercise of permissions under this License.
8. Limitation of Liability.
In no event and under no legal theory, whether in tort
(including negligence), contract, or otherwise, unless required by
applicable law (such as deliberate and grossly negligent acts) or agreed
to in writing, shall any Contributor be liable to You for damages,
including any direct, indirect, special, incidental, or consequential
damages of any character arising as a result of this License or out of
the use or inability to use the Work (including but not limited to damages
for loss of goodwill, work stoppage, computer failure or malfunction,
or any and all other commercial damages or losses), even if such
Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability.
While redistributing the Work or Derivative Works thereof, You may choose
to offer, and charge a fee for, acceptance of support, warranty,
indemnity, or other liability obligations and/or rights consistent with
this License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf of any
other Contributor, and only if You agree to indemnify, defend, and hold
each Contributor harmless for any liability incurred by, or claims
asserted against, such Contributor by reason of your accepting any such
warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work
To apply the Apache License to your work, attach the following boilerplate
notice, with the fields enclosed by brackets "[]" replaced with your own
identifying information. (Don't include the brackets!) The text should be
enclosed in the appropriate comment syntax for the file format. We also
recommend that a file or class name and description of purpose be included
on the same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2020 Dominic Davis-Foster
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing
permissions and limitations under the License.
License-File: LICENSE
Keywords: deprecation
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
Requires-Python: >=3.6.1
Requires-Dist: deprecation>=2.1.0
Requires-Dist: packaging>=20.4
Description-Content-Type: text/x-rst
##################
deprecation-alias
##################
.. start short_desc
**A wrapper around 'deprecation' providing support for deprecated aliases.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |actions_linux| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/deprecation-alias/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/deprecation-alias/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/deprecation-alias/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/deprecation-alias/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/deprecation-alias/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/deprecation-alias?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/deprecation-alias?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/deprecation-alias
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/deprecation-alias
:target: https://pypi.org/project/deprecation-alias/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/deprecation-alias?logo=python&logoColor=white
:target: https://pypi.org/project/deprecation-alias/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/deprecation-alias
:target: https://pypi.org/project/deprecation-alias/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/deprecation-alias
:target: https://pypi.org/project/deprecation-alias/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/deprecation-alias?logo=anaconda
:target: https://anaconda.org/domdfcoding/deprecation-alias
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/deprecation-alias?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/deprecation-alias
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/deprecation-alias
:target: https://github.com/domdfcoding/deprecation-alias/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/deprecation-alias
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/deprecation-alias/v0.4.0
:target: https://github.com/domdfcoding/deprecation-alias/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/deprecation-alias
:target: https://github.com/domdfcoding/deprecation-alias/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/deprecation-alias
:target: https://pypi.org/project/deprecation-alias/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``deprecation-alias`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install deprecation-alias
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install deprecation-alias
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for deprecation-alias
</title>
</head>
<body>
<h1>
Links for deprecation-alias
</h1>
<a href="/deprecation-alias/deprecation_alias-0.4.0-py3-none-any.whl#sha256=a2d3cb08705d81bcc845ebbeff8e981ca7e5ef6c51478f0c771c38a54d7d7811" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=2309e22f77ba5cc3b021d68ff4836d53eb1f2c40d4b46d8414b3f49a7b5a7c83">
deprecation_alias-0.4.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,114 @@
Metadata-Version: 2.1
Name: deprecation
Version: 2.1.0
Summary: A library to handle automated deprecations
Home-page: http://deprecation.readthedocs.io/
Author: Brian Curtin
Author-email: brian@python.org
Maintainer: Brian Curtin
Maintainer-email: brian@python.org
License: Apache 2
Project-URL: Documentation, http://deprecation.readthedocs.io/en/latest/
Project-URL: Source, https://github.com/briancurtin/deprecation
Project-URL: Bug Tracker, https://github.com/briancurtin/deprecation/issues
Keywords: deprecation
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Dist: packaging
deprecation
===========
.. image:: https://readthedocs.org/projects/deprecation/badge/?version=latest
:target: http://deprecation.readthedocs.io/en/latest/
:alt: Documentation Status
.. image:: https://travis-ci.org/briancurtin/deprecation.svg?branch=master
:target: https://travis-ci.org/briancurtin/deprecation
.. image:: https://codecov.io/gh/briancurtin/deprecation/branch/master/graph/badge.svg
:target: https://codecov.io/gh/briancurtin/deprecation
The ``deprecation`` library provides a ``deprecated`` decorator and a
``fail_if_not_removed`` decorator for your tests. Together, the two
enable the automation of several things:
1. The docstring of a deprecated method gets the deprecation details
appended to the end of it. If you generate your API docs direct
from your source, you don't need to worry about writing your own
notification. You also don't need to worry about forgetting to
write it. It's done for you.
2. Rather than having code live on forever because you only deprecated
it but never actually moved on from it, you can have your tests
tell you when it's time to remove the code. The ``@deprecated``
decorator can be told when it's time to entirely remove the code,
which causes ``@fail_if_not_removed`` to raise an ``AssertionError``,
causing either your unittest or py.test tests to fail.
See http://deprecation.readthedocs.io/ for the full documentation.
Installation
============
::
pip install deprecation
Usage
=====
::
import deprecation
@deprecation.deprecated(deprecated_in="1.0", removed_in="2.0",
current_version=__version__,
details="Use the bar function instead")
def foo():
"""Do some stuff"""
return 1
...but doesn't Python ignore ``DeprecationWarning``?
====================================================
Yes, by default since 2.7—and for good reason [#]_ —and this works fine
with that.
1. It often makes sense for you to run your tests with a ``-W`` flag or
the ``PYTHONWARNINGS`` environment variable so you catch warnings
in development and handle them appropriately. The warnings raised by
this library show up there, as they're subclasses of the built-in
``DeprecationWarning``. See the `Command Line
<https://docs.python.org/2/using/cmdline.html#cmdoption-W>`_
and `Environment Variable
<https://docs.python.org/2/using/cmdline.html#envvar-PYTHONWARNINGS>`_
documentation for more details.
2. Even if you don't enable those things, the behavior of this library
remains the same. The docstrings will still be updated and the tests
will still fail when they need to. You'll get the benefits regardless
of what Python cares about ``DeprecationWarning``.
----
.. [#] Exposing application users to ``DeprecationWarning``\s that are
emitted by lower-level code needlessly involves end-users in
"how things are done." It often leads to users raising issues
about warnings they're presented, which on one hand is done
rightfully so, as it's been presented to them as some sort of
issue to resolve. However, at the same time, the warning could
be well known and planned for. From either side, loud
``DeprecationWarning``\s can be seen as noise that isn't
necessary outside of development.

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for deprecation
</title>
</head>
<body>
<h1>
Links for deprecation
</h1>
<a href="/deprecation/deprecation-2.1.0-py2.py3-none-any.whl#sha256=a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a" data-dist-info-metadata="sha256=02696153ceadfe541087ce7c8d2319319a6860cea07e79cb48a29ee4380653cf">
deprecation-2.1.0-py2.py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,203 @@
Metadata-Version: 2.4
Name: dist-meta
Version: 0.9.0
Summary: Parse and create Python distribution metadata.
Project-URL: Homepage, https://github.com/repo-helper/dist-meta
Project-URL: Issue Tracker, https://github.com/repo-helper/dist-meta/issues
Project-URL: Source Code, https://github.com/repo-helper/dist-meta
Project-URL: Documentation, https://dist-meta.readthedocs.io/en/latest
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2021 Dominic Davis-Foster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
License-File: LICENSE
Keywords: dist-info,metadata,packaging,pypi
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Typing :: Typed
Requires-Python: >=3.6.1
Requires-Dist: domdf-python-tools>=3.1.0
Requires-Dist: handy-archives>=0.1.0
Requires-Dist: packaging>=20.9
Description-Content-Type: text/x-rst
==========
dist-meta
==========
.. start short_desc
**Parse and create Python distribution metadata.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/dist-meta/latest?logo=read-the-docs
:target: https://dist-meta.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/repo-helper/dist-meta/workflows/Docs%20Check/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/repo-helper/dist-meta/workflows/Linux/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/repo-helper/dist-meta/workflows/Windows/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/repo-helper/dist-meta/workflows/macOS/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/repo-helper/dist-meta/workflows/Flake8/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/repo-helper/dist-meta/workflows/mypy/badge.svg
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/dist-meta/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/dist-meta/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/dist-meta/master?logo=coveralls
:target: https://coveralls.io/github/repo-helper/dist-meta?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/dist-meta?logo=codefactor
:target: https://www.codefactor.io/repository/github/repo-helper/dist-meta
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/dist-meta
:target: https://pypi.org/project/dist-meta/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/dist-meta?logo=python&logoColor=white
:target: https://pypi.org/project/dist-meta/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/dist-meta
:target: https://pypi.org/project/dist-meta/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/dist-meta
:target: https://pypi.org/project/dist-meta/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/dist-meta?logo=anaconda
:target: https://anaconda.org/domdfcoding/dist-meta
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/dist-meta?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/dist-meta
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/repo-helper/dist-meta
:target: https://github.com/repo-helper/dist-meta/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/dist-meta
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/dist-meta/v0.9.0
:target: https://github.com/repo-helper/dist-meta/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/dist-meta
:target: https://github.com/repo-helper/dist-meta/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/dist-meta
:target: https://pypi.org/project/dist-meta/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``dist-meta`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install dist-meta
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install dist-meta
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for dist-meta
</title>
</head>
<body>
<h1>
Links for dist-meta
</h1>
<a href="/dist-meta/dist_meta-0.9.0-py3-none-any.whl#sha256=1d38a7f5e83ab2fc5e1fb9af92d995ec27718377d000c7318367db85a2ba07db" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=07d929464ad91284dfa37ce8370376146451918ba6a0af118181cb382a9dcd2a">
dist_meta-0.9.0-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,183 @@
Metadata-Version: 2.4
Name: dom_toml
Version: 2.1.0
Summary: Dom's tools for Tom's Obvious, Minimal Language.
Keywords: configuration,serialize,toml
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
License-File: LICENSE
Requires-Dist: domdf-python-tools>=2.8.0
Requires-Dist: tomli>=1.2.3; python_version < "3.11"
Project-URL: Documentation, https://dom-toml.readthedocs.io/en/latest
Project-URL: Homepage, https://github.com/domdfcoding/dom_toml
Project-URL: Issue Tracker, https://github.com/domdfcoding/dom_toml/issues
Project-URL: Source Code, https://github.com/domdfcoding/dom_toml
#########
dom_toml
#########
.. start short_desc
**Dom's tools for Tom's Obvious, Minimal Language.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/dom-toml/latest?logo=read-the-docs
:target: https://dom-toml.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/dom_toml/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/dom_toml/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/dom_toml/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/dom_toml/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/dom_toml/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/dom_toml/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/dom_toml/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/dom_toml/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/dom_toml/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/dom_toml?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/dom_toml?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/dom_toml
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/dom_toml
:target: https://pypi.org/project/dom_toml/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/dom_toml?logo=python&logoColor=white
:target: https://pypi.org/project/dom_toml/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/dom_toml
:target: https://pypi.org/project/dom_toml/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/dom_toml
:target: https://pypi.org/project/dom_toml/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/dom_toml?logo=anaconda
:target: https://anaconda.org/domdfcoding/dom_toml
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/dom_toml?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/dom_toml
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/dom_toml
:target: https://github.com/domdfcoding/dom_toml/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/dom_toml
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/dom_toml/v2.1.0
:target: https://github.com/domdfcoding/dom_toml/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/dom_toml
:target: https://github.com/domdfcoding/dom_toml/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/dom_toml
:target: https://pypi.org/project/dom_toml/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``dom_toml`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install dom_toml
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install dom_toml
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for dom-toml
</title>
</head>
<body>
<h1>
Links for dom-toml
</h1>
<a href="/dom-toml/dom_toml-2.1.0-py3-none-any.whl#sha256=f5c4921940ad34b9e95d391f44000e001688b400fd21afb5cb430e8c29172da1" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=b4e42073be586c35660e3e9b398fef73d11ad42d2a429237e55003ba088436d6">
dom_toml-2.1.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,209 @@
Metadata-Version: 2.4
Name: domdf_python_tools
Version: 3.10.0
Summary: Helpful functions for Python🐍🛠
Project-URL: Homepage, https://github.com/domdfcoding/domdf_python_tools
Project-URL: Issue Tracker, https://github.com/domdfcoding/domdf_python_tools/issues
Project-URL: Source Code, https://github.com/domdfcoding/domdf_python_tools
Project-URL: Documentation, https://domdf-python-tools.readthedocs.io/en/latest
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2019-2022 Dominic Davis-Foster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
License-File: LICENSE
Keywords: utilities
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Typing :: Typed
Requires-Python: >=3.6
Requires-Dist: importlib-metadata>=3.6.0; python_version < '3.9'
Requires-Dist: importlib-resources>=3.0.0; python_version < '3.9'
Requires-Dist: natsort>=7.0.1
Requires-Dist: typing-extensions>=3.7.4.1
Provides-Extra: all
Requires-Dist: pytz>=2019.1; extra == 'all'
Provides-Extra: dates
Requires-Dist: pytz>=2019.1; extra == 'dates'
Provides-Extra: testing
Description-Content-Type: text/x-rst
=====================
domdf_python_tools
=====================
.. start short_desc
**Helpful functions for Python🐍🛠**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/domdf-python-tools/latest?logo=read-the-docs
:target: https://domdf-python-tools.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/domdf_python_tools/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/domdf_python_tools/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/domdf_python_tools/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/domdf_python_tools?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/domdf_python_tools?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/domdf_python_tools
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/domdf_python_tools
:target: https://pypi.org/project/domdf_python_tools/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/domdf_python_tools?logo=python&logoColor=white
:target: https://pypi.org/project/domdf_python_tools/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/domdf_python_tools
:target: https://pypi.org/project/domdf_python_tools/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/domdf_python_tools
:target: https://pypi.org/project/domdf_python_tools/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/domdf_python_tools?logo=anaconda
:target: https://anaconda.org/domdfcoding/domdf_python_tools
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/domdf_python_tools?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/domdf_python_tools
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/domdf_python_tools
:target: https://github.com/domdfcoding/domdf_python_tools/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/domdf_python_tools
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/domdf_python_tools/v3.10.0
:target: https://github.com/domdfcoding/domdf_python_tools/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/domdf_python_tools
:target: https://github.com/domdfcoding/domdf_python_tools/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/domdf_python_tools
:target: https://pypi.org/project/domdf_python_tools/
:alt: PyPI - Downloads
.. end shields
**Note:** Before version 3 ``domdf_python_tools`` was licensed under the LGPLv3+.
Version 3 and later are licensed under the MIT License.
.. start installation
``domdf_python_tools`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install domdf_python_tools
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install domdf_python_tools
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for domdf-python-tools
</title>
</head>
<body>
<h1>
Links for domdf-python-tools
</h1>
<a href="/domdf-python-tools/domdf_python_tools-3.10.0-py3-none-any.whl#sha256=5e71c1be71bbcc1f881d690c8984b60e64298ec256903b3147f068bc33090c36" data-requires-python="&gt;=3.6" data-dist-info-metadata="sha256=7e6c54a34f6e96c6dd0659187c87f325e29b3edf2c81e4ba4bac990ca6b70d66">
domdf_python_tools-3.10.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,187 @@
Metadata-Version: 2.1
Name: handy-archives
Version: 0.2.0
Summary: Some handy archive helpers for Python.
Keywords: archive,shutil,tar,zip
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
Requires-Python: >=3.6.1
Description-Content-Type: text/x-rst
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving
Classifier: Topic :: System :: Archiving :: Compression
Classifier: Typing :: Typed
Requires-Dist: coincidence>=0.2.0 ; extra == "all"
Requires-Dist: pytest>=6.0.0 ; extra == "all"
Requires-Dist: coincidence>=0.2.0 ; extra == "testing"
Requires-Dist: pytest>=6.0.0 ; extra == "testing"
Project-URL: Documentation, https://handy-archives.readthedocs.io/en/latest
Project-URL: Homepage, https://github.com/domdfcoding/handy-archives
Project-URL: Issue Tracker, https://github.com/domdfcoding/handy-archives/issues
Project-URL: Source Code, https://github.com/domdfcoding/handy-archives
Provides-Extra: all
Provides-Extra: testing
===============
handy-archives
===============
.. start short_desc
**Some handy archive helpers for Python.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/handy-archives/latest?logo=read-the-docs
:target: https://handy-archives.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/handy-archives/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/handy-archives/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/handy-archives/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/handy-archives/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/handy-archives/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/handy-archives/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/handy-archives/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/handy-archives/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/handy-archives/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/handy-archives?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/handy-archives?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/handy-archives
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/handy-archives
:target: https://pypi.org/project/handy-archives/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/handy-archives?logo=python&logoColor=white
:target: https://pypi.org/project/handy-archives/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/handy-archives
:target: https://pypi.org/project/handy-archives/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/handy-archives
:target: https://pypi.org/project/handy-archives/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/handy-archives?logo=anaconda
:target: https://anaconda.org/domdfcoding/handy-archives
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/handy-archives?label=conda%7Cplatform
:target: https://anaconda.org/domdfcoding/handy-archives
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/handy-archives
:target: https://github.com/domdfcoding/handy-archives/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/handy-archives
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/handy-archives/v0.2.0
:target: https://github.com/domdfcoding/handy-archives/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/handy-archives
:target: https://github.com/domdfcoding/handy-archives/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/handy-archives
:target: https://pypi.org/project/handy-archives/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``handy-archives`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install handy-archives
To install with ``conda``:
* First add the required channels
.. code-block:: bash
$ conda config --add channels https://conda.anaconda.org/conda-forge
$ conda config --add channels https://conda.anaconda.org/domdfcoding
* Then install
.. code-block:: bash
$ conda install handy-archives
.. end installation

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for handy-archives
</title>
</head>
<body>
<h1>
Links for handy-archives
</h1>
<a href="/handy-archives/handy_archives-0.2.0-py3-none-any.whl#sha256=8495e08f3cd1c452fe65570db1869db07f709149f85c7e9cd8f3f461df436318" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=1c8ed778cedfb20d025aff1e14f2f5f42de88c881ac96493f0a337be3f48078e">
handy_archives-0.2.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,230 @@
Metadata-Version: 2.1
Name: hatch-requirements-txt
Version: 0.4.1
Summary: Hatchling plugin to read project dependencies from requirements.txt
Project-URL: Homepage, https://github.com/repo-helper/hatch-requirements-txt
Project-URL: Issue Tracker, https://github.com/repo-helper/hatch-requirements-txt/issues
Project-URL: Source Code, https://github.com/repo-helper/hatch-requirements-txt
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2022 Dominic Davis-Foster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
License-File: LICENSE
Keywords: dependencies,hatch,requirements
Classifier: Development Status :: 4 - Beta
Classifier: Framework :: Hatch
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Typing :: Typed
Requires-Python: >=3.6.1
Requires-Dist: hatchling>=0.21.0
Requires-Dist: packaging>=21.3
Description-Content-Type: text/x-rst
=======================
hatch-requirements-txt
=======================
.. start short_desc
**Hatchling plugin to read project dependencies from requirements.txt**
.. end short_desc
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |hatch| |license| |language| |requires|
.. |actions_linux| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Linux/badge.svg
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Windows/badge.svg
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/macOS/badge.svg
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Flake8/badge.svg
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/mypy/badge.svg
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/hatch-requirements-txt/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/hatch-requirements-txt/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/hatch-requirements-txt/master?logo=coveralls
:target: https://coveralls.io/github/repo-helper/hatch-requirements-txt?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/hatch-requirements-txt?logo=codefactor
:target: https://www.codefactor.io/repository/github/repo-helper/hatch-requirements-txt
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/hatch-requirements-txt
:target: https://pypi.org/project/hatch-requirements-txt/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/hatch-requirements-txt?logo=python&logoColor=white
:target: https://pypi.org/project/hatch-requirements-txt/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/hatch-requirements-txt
:target: https://pypi.org/project/hatch-requirements-txt/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/hatch-requirements-txt
:target: https://pypi.org/project/hatch-requirements-txt/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/hatch-requirements-txt?logo=anaconda
:target: https://anaconda.org/conda-forge/hatch-requirements-txt
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/hatch-requirements-txt?label=conda%7Cplatform
:target: https://anaconda.org/conda-forge/hatch-requirements-txt
:alt: Conda - Platform
.. |hatch| image:: https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg
:target: https://github.com/pypa/hatch
:alt: Hatch project
.. |license| image:: https://img.shields.io/github/license/repo-helper/hatch-requirements-txt
:target: https://github.com/repo-helper/hatch-requirements-txt/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/hatch-requirements-txt
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/hatch-requirements-txt/v0.4.1
:target: https://github.com/repo-helper/hatch-requirements-txt/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/hatch-requirements-txt
:target: https://github.com/repo-helper/hatch-requirements-txt/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/hatch-requirements-txt
:target: https://pypi.org/project/hatch-requirements-txt/
:alt: PyPI - Downloads
Usage
========
In your ``pyproject.toml`` make the following changes:
* Append ``hatch-requirements-txt`` to ``build-system.requires``.
* Append ``"dependencies"`` to ``project.dynamic``.
* Add the following table:
.. code-block:: toml
[tool.hatch.metadata.hooks.requirements_txt]
files = ["requirements.txt"]
The resulting ``pyproject.toml`` should look something like:
.. code-block:: toml
[build-system]
requires = ["hatchling", "hatch-requirements-txt"]
build-backend = "hatchling.build"
[project]
name = "my-project"
version = "1.0.0"
dynamic = ["dependencies"]
[tool.hatch.metadata.hooks.requirements_txt]
files = ["requirements.txt"]
You can also define groups of `optional dependencies <https://hatch.pypa.io/latest/config/dependency/#features>`_
(also known as "features") by appending ``optional-dependencies`` to ``project.dynamic`` and adding a table like:
.. code-block:: toml
[tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
crypto = ["requirements-crypto.txt"]
fastjson = ["requirements-fastjson.txt"]
cli = ["requirements-cli.txt"]
Requirements file format
============================
``hatch-requirements-txt`` only supports a subset of the ``requirements.txt`` format_ supported by ``pip``.
The following are supported:
* requirement specifiers, per `PEP 508`_
* Comments, prefixed with a ``#``.
* ``--<option>`` options, both on their own line and after a requirement specifier.
Note however that the options themselves are ignored.
The following are unsupported within ``requirements.txt`` files:
* Editable install commands with the ``-e`` option,
* References to other requirements or constraints files with the ``-r`` or ``-c`` options.
* References to paths on the local filesystem, or URLs.
**TL;DR**
For best compatibility, ensure all lines in your ``requirements.txt`` files
are valid PEP 508 requirements, or comments starting with a ``#``.
.. _format: https://pip.pypa.io/en/stable/reference/requirements-file-format/
.. _PEP 508: https://peps.python.org/pep-0508/

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for hatch-requirements-txt
</title>
</head>
<body>
<h1>
Links for hatch-requirements-txt
</h1>
<a href="/hatch-requirements-txt/hatch_requirements_txt-0.4.1-py3-none-any.whl#sha256=13c6ab7707cbc0aba128b3626dacae4be7065f1dbafc792461c9f547b3c3ffde" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=363835e529855acbb3f275b7c7fbb79901e5b4ac2b336a75765b33db5cb6f333">
hatch_requirements_txt-0.4.1-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -1,6 +1,6 @@
Metadata-Version: 2.4 Metadata-Version: 2.4
Name: hatchling Name: hatchling
Version: 1.27.0 Version: 1.28.0
Summary: Modern, extensible Python build backend Summary: Modern, extensible Python build backend
Project-URL: Homepage, https://hatch.pypa.io/latest/ Project-URL: Homepage, https://hatch.pypa.io/latest/
Project-URL: Sponsor, https://github.com/sponsors/ofek Project-URL: Sponsor, https://github.com/sponsors/ofek
@ -13,19 +13,18 @@ License-File: LICENSE.txt
Keywords: build,hatch,packaging Keywords: build,hatch,packaging
Classifier: Development Status :: 5 - Production/Stable Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Natural Language :: English Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Build Tools Classifier: Topic :: Software Development :: Build Tools
Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Python: >=3.8 Requires-Python: >=3.10
Requires-Dist: packaging>=24.2 Requires-Dist: packaging>=24.2
Requires-Dist: pathspec>=0.10.1 Requires-Dist: pathspec>=0.10.1
Requires-Dist: pluggy>=1.0.0 Requires-Dist: pluggy>=1.0.0
@ -42,7 +41,7 @@ Description-Content-Type: text/markdown
| | | | | |
| --- | --- | | --- | --- |
| Package | [![PyPI - Version](https://img.shields.io/pypi/v/hatchling.svg?logo=pypi&label=PyPI&logoColor=gold)](https://pypi.org/project/hatchling/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/hatchling.svg?color=blue&label=Downloads&logo=pypi&logoColor=gold)](https://pypi.org/project/hatchling/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/hatchling.svg?logo=python&label=Python&logoColor=gold)](https://pypi.org/project/hatchling/) | | Package | [![PyPI - Version](https://img.shields.io/pypi/v/hatchling.svg?logo=pypi&label=PyPI&logoColor=gold)](https://pypi.org/project/hatchling/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/hatchling.svg?color=blue&label=Downloads&logo=pypi&logoColor=gold)](https://pypi.org/project/hatchling/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/hatchling.svg?logo=python&label=Python&logoColor=gold)](https://pypi.org/project/hatchling/) |
| Meta | [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch) [![linting - Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![code style - Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![types - Mypy](https://img.shields.io/badge/types-Mypy-blue.svg)](https://github.com/python/mypy) [![License - MIT](https://img.shields.io/badge/license-MIT-9400d3.svg)](https://spdx.org/licenses/) [![GitHub Sponsors](https://img.shields.io/github/sponsors/ofek?logo=GitHub%20Sponsors&style=social)](https://github.com/sponsors/ofek) | | Meta | [![Hatch project](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pypa/hatch/master/docs/assets/badge/v0.json)](https://github.com/pypa/hatch) [![linting - Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) [![code style - Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![types - Mypy](https://img.shields.io/badge/types-Mypy-blue.svg)](https://github.com/python/mypy) [![License - MIT](https://img.shields.io/badge/license-MIT-9400d3.svg)](https://spdx.org/licenses/) [![GitHub Sponsors](https://img.shields.io/github/sponsors/ofek?logo=GitHub%20Sponsors&style=social)](https://github.com/sponsors/ofek) |
</div> </div>

View File

@ -12,8 +12,8 @@
<h1> <h1>
Links for hatchling Links for hatchling
</h1> </h1>
<a href="/hatchling/hatchling-1.27.0-py3-none-any.whl#sha256=d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b" data-requires-python="&gt;=3.8" data-dist-info-metadata="sha256=a224ea37d2658f75ccf569e61d849ff7e9297cdac2396022278e82a47b9fed3e"> <a href="/hatchling/hatchling-1.28.0-py3-none-any.whl#sha256=dc48722b68b3f4bbfa3ff618ca07cdea6750e7d03481289ffa8be1521d18a961" data-requires-python="&gt;=3.10" data-dist-info-metadata="sha256=acd3b755437aa07859ebe3ed9b71d94536fa183cc851bbd95b437ba3fe7e09b6">
hatchling-1.27.0-py3-none-any.whl hatchling-1.28.0-py3-none-any.whl
</a> </a>
<br /> <br />
</body> </body>

View File

@ -37,6 +37,18 @@
aiosqlite aiosqlite
</a> </a>
<br /> <br />
<a href="/airium/">
airium
</a>
<br />
<a href="/apeye/">
apeye
</a>
<br />
<a href="/apeye-core/">
apeye-core
</a>
<br />
<a href="/asyncua/"> <a href="/asyncua/">
asyncua asyncua
</a> </a>
@ -61,18 +73,34 @@
calver calver
</a> </a>
<br /> <br />
<a href="/certifi/">
certifi
</a>
<br />
<a href="/cffi/"> <a href="/cffi/">
cffi cffi
</a> </a>
<br /> <br />
<a href="/charset-normalizer/">
charset-normalizer
</a>
<br />
<a href="/choreographer/"> <a href="/choreographer/">
choreographer choreographer
</a> </a>
<br /> <br />
<a href="/click/">
click
</a>
<br />
<a href="/cloudpickle/"> <a href="/cloudpickle/">
cloudpickle cloudpickle
</a> </a>
<br /> <br />
<a href="/consolekit/">
consolekit
</a>
<br />
<a href="/cryptography/"> <a href="/cryptography/">
cryptography cryptography
</a> </a>
@ -81,6 +109,26 @@
Cython Cython
</a> </a>
<br /> <br />
<a href="/deprecation/">
deprecation
</a>
<br />
<a href="/deprecation-alias/">
deprecation-alias
</a>
<br />
<a href="/dist-meta/">
dist-meta
</a>
<br />
<a href="/dom-toml/">
dom_toml
</a>
<br />
<a href="/domdf-python-tools/">
domdf_python_tools
</a>
<br />
<a href="/expandvars/"> <a href="/expandvars/">
expandvars expandvars
</a> </a>
@ -109,10 +157,18 @@
greenlet greenlet
</a> </a>
<br /> <br />
<a href="/handy-archives/">
handy-archives
</a>
<br />
<a href="/hatch-fancy-pypi-readme/"> <a href="/hatch-fancy-pypi-readme/">
hatch-fancy-pypi-readme hatch-fancy-pypi-readme
</a> </a>
<br /> <br />
<a href="/hatch-requirements-txt/">
hatch-requirements-txt
</a>
<br />
<a href="/hatch-vcs/"> <a href="/hatch-vcs/">
hatch-vcs hatch-vcs
</a> </a>
@ -165,10 +221,18 @@
meson-python meson-python
</a> </a>
<br /> <br />
<a href="/mistletoe/">
mistletoe
</a>
<br />
<a href="/multidict/"> <a href="/multidict/">
multidict multidict
</a> </a>
<br /> <br />
<a href="/natsort/">
natsort
</a>
<br />
<a href="/networkx/"> <a href="/networkx/">
networkx networkx
</a> </a>
@ -205,6 +269,10 @@
pkgconfig pkgconfig
</a> </a>
<br /> <br />
<a href="/platformdirs/">
platformdirs
</a>
<br />
<a href="/pluggy/"> <a href="/pluggy/">
pluggy pluggy
</a> </a>
@ -261,6 +329,10 @@
pyproject-metadata pyproject-metadata
</a> </a>
<br /> <br />
<a href="/pyproject-parser/">
pyproject-parser
</a>
<br />
<a href="/pytest/"> <a href="/pytest/">
pytest pytest
</a> </a>
@ -285,6 +357,10 @@
redis redis
</a> </a>
<br /> <br />
<a href="/requests/">
requests
</a>
<br />
<a href="/s3fs/"> <a href="/s3fs/">
s3fs s3fs
</a> </a>
@ -329,6 +405,14 @@
shap shap
</a> </a>
<br /> <br />
<a href="/shippinglabel/">
shippinglabel
</a>
<br />
<a href="/simple503/">
simple503
</a>
<br />
<a href="/simplejson/"> <a href="/simplejson/">
simplejson simplejson
</a> </a>
@ -389,6 +473,10 @@
wheel wheel
</a> </a>
<br /> <br />
<a href="/whey/">
whey
</a>
<br />
<a href="/wrapt/"> <a href="/wrapt/">
wrapt wrapt
</a> </a>

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for mistletoe
</title>
</head>
<body>
<h1>
Links for mistletoe
</h1>
<a href="/mistletoe/mistletoe-1.5.0-py3-none-any.whl#sha256=d4e77b991b998c5efe3c4eab4e1b472263b5349688acd50d79bd9a6c317a9df1" data-requires-python="~=3.5" data-dist-info-metadata="sha256=4bcb1dceef05f4cbfde4c90c9c9cd88836dcef7511b70914f63f2245b51c635f">
mistletoe-1.5.0-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,307 @@
Metadata-Version: 2.4
Name: mistletoe
Version: 1.5.0
Summary: A fast, extensible Markdown parser in pure Python.
Home-page: https://github.com/miyuchina/mistletoe
Author: Mi Yu
Author-email: hello@afteryu.me
License: MIT
Keywords: markdown lexer parser development
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: Markdown
Requires-Python: ~=3.5
Description-Content-Type: text/markdown
License-File: LICENSE
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: description-content-type
Dynamic: home-page
Dynamic: keywords
Dynamic: license
Dynamic: license-file
Dynamic: requires-python
Dynamic: summary
<h1>mistletoe<img src='https://cdn.rawgit.com/miyuchina/mistletoe/master/resources/logo.svg' align='right' width='128' height='128'></h1>
[![Build Status][build-badge]][github-actions]
[![Coverage Status][cover-badge]][coveralls]
[![PyPI][pypi-badge]][pypi]
[![is wheel][wheel-badge]][pypi]
mistletoe is a Markdown parser in pure Python,
designed to be fast, spec-compliant and fully customizable.
Apart from being the fastest
CommonMark-compliant Markdown parser implementation in pure Python,
mistletoe also supports easy definitions of custom tokens.
Parsing Markdown into an abstract syntax tree
also allows us to swap out renderers for different output formats,
without touching any of the core components.
Remember to spell mistletoe in lowercase!
Features
--------
* **Fast**:
mistletoe is the fastest implementation of CommonMark in Python.
See the [performance][performance] section for details.
* **Spec-compliant**:
CommonMark is [a useful, high-quality project][oilshell].
mistletoe follows the [CommonMark specification][commonmark]
to resolve ambiguities during parsing.
Outputs are predictable and well-defined.
* **Extensible**:
Strikethrough and tables are supported natively,
and custom block-level and span-level tokens can easily be added.
Writing a new renderer for mistletoe is a relatively
trivial task.
You can even write [a Lisp][scheme] in it.
Output formats
--------------
Renderers for the following "core" output formats exist within the mistletoe
main package:
* HTML
* LaTeX
* AST (Abstract Syntax Tree; handy for debugging the parsing process)
* Markdown (Can be used to reflow the text, or make other types of automated
changes to Markdown documents)
Renderers for the following output formats can be found
in the [contrib][contrib] package:
* HTML with MathJax (_mathjax.py_)
* HTML with code highlighting (using Pygments) (_pygments\_renderer.py_)
* HTML with TOC (for programmatical use) (_toc\_renderer.py_)
* HTML with support for GitHub wiki links (_github\_wiki.py_)
* Jira Markdown (_jira\_renderer.py_)
* XWiki Syntax (_xwiki20\_renderer.py_)
* Scheme (_scheme.py_)
Installation
------------
mistletoe is tested for Python 3.5 and above. Install mistletoe with pip:
```sh
pip3 install mistletoe
```
Alternatively, clone the repo:
```sh
git clone https://github.com/miyuchina/mistletoe.git
cd mistletoe
pip3 install -e .
```
This installs mistletoe in "editable" mode (because of the `-e` option).
That means that any changes made to the source code will get visible
immediately - that's because Python only makes a link to the specified
directory (`.`) instead of copying the files to the standard packages
folder.
See the [contributing][contributing] doc for how to contribute to mistletoe.
Usage
-----
### Usage from Python
Here's how you can use mistletoe in a Python script:
```python
import mistletoe
with open('foo.md', 'r') as fin:
rendered = mistletoe.markdown(fin)
```
`mistletoe.markdown()` uses mistletoe's default settings: allowing HTML mixins
and rendering to HTML. The function also accepts an additional argument
`renderer`. To produce LaTeX output:
```python
import mistletoe
from mistletoe.latex_renderer import LaTeXRenderer
with open('foo.md', 'r') as fin:
rendered = mistletoe.markdown(fin, LaTeXRenderer)
```
To reflow the text in a Markdown document with a max line length of 20 characters:
```python
import mistletoe
from mistletoe.markdown_renderer import MarkdownRenderer
with open('dev-guide.md', 'r') as fin:
with MarkdownRenderer(max_line_length=20) as renderer:
print(renderer.render(mistletoe.Document(fin)))
```
Finally, here's how you would manually specify extra tokens via a renderer.
In the following example, we use `HtmlRenderer` to render
the AST. The renderer itself adds `HtmlBlock` and `HtmlSpan` tokens to the parsing
process. The result should be equal to the output obtained from
the first example above.
```python
from mistletoe import Document, HtmlRenderer
with open('foo.md', 'r') as fin:
with HtmlRenderer() as renderer: # or: `with HtmlRenderer(AnotherToken1, AnotherToken2) as renderer:`
doc = Document(fin) # parse the lines into AST
rendered = renderer.render(doc) # render the AST
# internal lists of tokens to be parsed are automatically reset when exiting this `with` block
```
**Important**: As can be seen from the example above,
the parsing phase is currently tightly connected with initiation
and closing of a renderer. Therefore, you should never call `Document(...)`
outside of a `with ... as renderer` block, unless you know what you are doing.
### Usage from command-line
pip installation enables mistletoe's command-line utility. Type the following
directly into your shell:
```sh
mistletoe foo.md
```
This will transpile `foo.md` into HTML, and dump the output to stdout. To save
the HTML, direct the output into a file:
```sh
mistletoe foo.md > out.html
```
You can use a different renderer by including the full path to the renderer
class after a `-r` or `--renderer` flag. For example, to transpile into
LaTeX:
```sh
mistletoe foo.md --renderer mistletoe.latex_renderer.LaTeXRenderer
```
and similarly for a renderer in the contrib package:
```sh
mistletoe foo.md --renderer mistletoe.contrib.jira_renderer.JiraRenderer
```
### mistletoe interactive mode
Running `mistletoe` without specifying a file will land you in interactive
mode. Like Python's REPL, interactive mode allows you to test how your
Markdown will be interpreted by mistletoe:
```html
mistletoe [version 0.7.2] (interactive)
Type Ctrl-D to complete input, or Ctrl-C to exit.
>>> some **bold** text
... and some *italics*
...
<p>some <strong>bold</strong> text
and some <em>italics</em></p>
>>>
```
The interactive mode also accepts the `--renderer` flag:
```latex
mistletoe [version 0.7.2] (interactive)
Type Ctrl-D to complete input, or Ctrl-C to exit.
Using renderer: LaTeXRenderer
>>> some **bold** text
... and some *italics*
...
\documentclass{article}
\begin{document}
some \textbf{bold} text
and some \textit{italics}
\end{document}
>>>
```
Who uses mistletoe?
-------------------
mistletoe is used by projects of various target audience.
You can find some concrete projects in the "Used by" section
on [Libraries.io][libraries-mistletoe], but this is definitely not a complete
list.
Also a list of [Dependents][github-dependents] is tracked by GitHub directly.
### Run mistletoe from CopyQ
One notable example is running mistletoe as a Markdown converter from the
advanced clipboard manager called [CopyQ][copyq]. One just needs to install
the [Convert Markdown to ...][copyq-convert-md] custom script command
and then run this command on any selected Markdown text.
Why mistletoe?
--------------
"For fun," says David Beazley.
Further reading
---------------
* [Performance][performance]
* [Developer's Guide](dev-guide.md)
Copyright & License
-------------------
* mistletoe's logo uses artwork by [Freepik][icon], under
[CC BY 3.0][cc-by].
* mistletoe is released under [MIT][license].
[build-badge]: https://img.shields.io/github/actions/workflow/status/miyuchina/mistletoe/python-package.yml?style=flat-square
[cover-badge]: https://img.shields.io/coveralls/miyuchina/mistletoe.svg?style=flat-square
[pypi-badge]: https://img.shields.io/pypi/v/mistletoe.svg?style=flat-square
[wheel-badge]: https://img.shields.io/pypi/wheel/mistletoe.svg?style=flat-square
[github-actions]: https://github.com/miyuchina/mistletoe/actions/workflows/python-package.yml
[coveralls]: https://coveralls.io/github/miyuchina/mistletoe?branch=master
[pypi]: https://pypi.python.org/pypi/mistletoe
[mistune]: https://github.com/lepture/mistune
[python-markdown]: https://github.com/waylan/Python-Markdown
[python-markdown2]: https://github.com/trentm/python-markdown2
[commonmark-py]: https://github.com/rtfd/CommonMark-py
[performance]: performance.md
[oilshell]: https://www.oilshell.org/blog/2018/02/14.html
[commonmark]: https://spec.commonmark.org/
[contrib]: https://github.com/miyuchina/mistletoe/tree/master/mistletoe/contrib
[scheme]: https://github.com/miyuchina/mistletoe/blob/master/mistletoe/contrib/scheme.py
[contributing]: CONTRIBUTING.md
[icon]: https://www.freepik.com
[cc-by]: https://creativecommons.org/licenses/by/3.0/us/
[license]: LICENSE
[pythonpath]: https://stackoverflow.com/questions/16107526/how-to-flexibly-change-pythonpath
[libraries-mistletoe]: https://libraries.io/pypi/mistletoe
[copyq]: https://hluk.github.io/CopyQ/
[copyq-convert-md]: https://github.com/hluk/copyq-commands/tree/master/Global#convert-markdown-to-
[github-dependents]: https://github.com/miyuchina/mistletoe/network/dependents

20
mirror/natsort/index.html Normal file
View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for natsort
</title>
</head>
<body>
<h1>
Links for natsort
</h1>
<a href="/natsort/natsort-8.4.0-py3-none-any.whl#sha256=4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=fe0934ce644ef39d034647b3280cc7e8afeaad92732ca70eb5f9e946db65f4a3">
natsort-8.4.0-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,548 @@
Metadata-Version: 2.1
Name: natsort
Version: 8.4.0
Summary: Simple yet flexible natural sorting in Python.
Home-page: https://github.com/SethMMorton/natsort
Author: Seth M. Morton
Author-email: drtuba78@gmail.com
License: MIT
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Science/Research
Classifier: Intended Audience :: System Administrators
Classifier: Intended Audience :: Information Technology
Classifier: Intended Audience :: Financial and Insurance Industry
Classifier: Operating System :: OS Independent
Classifier: License :: OSI Approved :: MIT License
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Topic :: Scientific/Engineering :: Information Analysis
Classifier: Topic :: Utilities
Classifier: Topic :: Text Processing
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
License-File: LICENSE
Provides-Extra: fast
Requires-Dist: fastnumbers (>=2.0.0) ; extra == 'fast'
Provides-Extra: icu
Requires-Dist: PyICU (>=1.0.0) ; extra == 'icu'
natsort
=======
.. image:: https://img.shields.io/pypi/v/natsort.svg
:target: https://pypi.org/project/natsort/
.. image:: https://img.shields.io/pypi/pyversions/natsort.svg
:target: https://pypi.org/project/natsort/
.. image:: https://img.shields.io/pypi/l/natsort.svg
:target: https://github.com/SethMMorton/natsort/blob/main/LICENSE
.. image:: https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg
:target: https://github.com/SethMMorton/natsort/actions
.. image:: https://codecov.io/gh/SethMMorton/natsort/branch/main/graph/badge.svg
:target: https://codecov.io/gh/SethMMorton/natsort
.. image:: https://img.shields.io/pypi/dw/natsort.svg
:target: https://pypi.org/project/natsort/
Simple yet flexible natural sorting in Python.
- Source Code: https://github.com/SethMMorton/natsort
- Downloads: https://pypi.org/project/natsort/
- Documentation: https://natsort.readthedocs.io/
- `Examples and Recipes`_
- `How Does Natsort Work?`_
- `API`_
- `Quick Description`_
- `Quick Examples`_
- `FAQ`_
- `Requirements`_
- `Optional Dependencies`_
- `Installation`_
- `How to Run Tests`_
- `How to Build Documentation`_
- `Dropped Deprecated APIs`_
- `History`_
**NOTE**: Please see the `Dropped Deprecated APIs`_ section for changes.
Quick Description
-----------------
When you try to sort a list of strings that contain numbers, the normal python
sort algorithm sorts lexicographically, so you might not get the results that
you expect:
.. code-block:: pycon
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
>>> sorted(a)
['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in']
Notice that it has the order ('1', '10', '2') - this is because the list is
being sorted in lexicographical order, which sorts numbers like you would
letters (i.e. 'b', 'ba', 'c').
`natsort`_ provides a function `natsorted()`_ that helps sort lists
"naturally" ("naturally" is rather ill-defined, but in general it means
sorting based on meaning and not computer code point).
Using `natsorted()`_ is simple:
.. code-block:: pycon
>>> from natsort import natsorted
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
>>> natsorted(a)
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
`natsorted()`_ identifies numbers anywhere in a string and sorts them
naturally. Below are some other things you can do with `natsort`_
(also see the `Examples and Recipes`_ for a quick start guide, or the
`API`_ for complete details).
**Note**: `natsorted()`_ is designed to be a drop-in replacement for the
built-in `sorted()`_ function. Like `sorted()`_, `natsorted()`_
`does not sort in-place`. To sort a list and assign the output to the same
variable, you must explicitly assign the output to a variable:
.. code-block:: pycon
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
>>> natsorted(a)
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
>>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list
['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
>>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a'
>>> print(a)
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for
an alternate way to sort in-place naturally.
Quick Examples
--------------
- `Sorting Versions`_
- `Sort Paths Like My File Browser (e.g. Windows Explorer on Windows)`_
- `Sorting by Real Numbers (i.e. Signed Floats)`_
- `Locale-Aware Sorting (or "Human Sorting")`_
- `Further Customizing Natsort`_
- `Sorting Mixed Types`_
- `Handling Bytes`_
- `Generating a Reusable Sorting Key and Sorting In-Place`_
- `Other Useful Things`_
Sorting Versions
++++++++++++++++
`natsort`_ does not actually *comprehend* version numbers.
It just so happens that the most common versioning schemes are designed to
work with standard natural sorting techniques; these schemes include
``MAJOR.MINOR``, ``MAJOR.MINOR.PATCH``, ``YEAR.MONTH.DAY``. If your data
conforms to a scheme like this, then it will work out-of-the-box with
`natsorted()`_ (as of `natsort`_ version >= 4.0.0):
.. code-block:: pycon
>>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10']
>>> natsorted(a)
['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0']
If you need to versions that use a more complicated scheme, please see
`these version sorting examples`_.
Sort Paths Like My File Browser (e.g. Windows Explorer on Windows)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Prior to `natsort`_ version 7.1.0, it was a common request to be able to
sort paths like Windows Explorer. As of `natsort`_ 7.1.0, the function
`os_sorted()`_ has been added to provide users the ability to sort
in the order that their file browser might sort (e.g Windows Explorer on
Windows, Finder on MacOS, Dolphin/Nautilus/Thunar/etc. on Linux).
.. code-block:: python
import os
from natsort import os_sorted
print(os_sorted(os.listdir()))
# The directory sorted like your file browser might show
Output will be different depending on the operating system you are on.
For users **not** on Windows (e.g. MacOS/Linux) it is **strongly** recommended
to also install `PyICU`_, which will help
`natsort`_ give results that match most file browsers. If this is not installed,
it will fall back on Python's built-in `locale`_ module and will give good
results for most input, but will give poor results for special characters.
Sorting by Real Numbers (i.e. Signed Floats)
++++++++++++++++++++++++++++++++++++++++++++
This is useful in scientific data analysis (and was the default behavior
of `natsorted()`_ for `natsort`_ version < 4.0.0). Use the `realsorted()`_
function:
.. code-block:: pycon
>>> from natsort import realsorted, ns
>>> # Note that when interpreting as signed floats, the below numbers are
>>> # +5.10, -3.00, +5.30, +2.00
>>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data']
>>> natsorted(a)
['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data']
>>> natsorted(a, alg=ns.REAL)
['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
>>> realsorted(a) # shortcut for natsorted with alg=ns.REAL
['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
Locale-Aware Sorting (or "Human Sorting")
+++++++++++++++++++++++++++++++++++++++++
This is where the non-numeric characters are also ordered based on their
meaning, not on their ordinal value, and a locale-dependent thousands
separator and decimal separator is accounted for in the number.
This can be achieved with the `humansorted()`_ function:
.. code-block:: pycon
>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
>>> natsorted(a)
['Apple', 'Banana', 'apple14,689', 'apple15', 'banana']
>>> import locale
>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
'en_US.UTF-8'
>>> natsorted(a, alg=ns.LOCALE)
['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
>>> from natsort import humansorted
>>> humansorted(a) # shortcut for natsorted with alg=ns.LOCALE
['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
You may find you need to explicitly set the locale to get this to work
(as shown in the example). Please see `locale issues`_ and the
`Optional Dependencies`_ section below before using the `humansorted()`_ function.
Further Customizing Natsort
+++++++++++++++++++++++++++
If you need to combine multiple algorithm modifiers (such as ``ns.REAL``,
``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the
bitwise OR operator (``|``). For example,
.. code-block:: pycon
>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE)
['Apple', 'apple15', 'apple14,689', 'Banana', 'banana']
>>> # The ns enum provides long and short forms for each option.
>>> ns.LOCALE == ns.L
True
>>> # You can also customize the convenience functions, too.
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == realsorted(a, alg=ns.L | ns.IC)
True
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == humansorted(a, alg=ns.R | ns.IC)
True
All of the available customizations can be found in the documentation for
`the ns enum`_.
You can also add your own custom transformation functions with the ``key``
argument. These can be used with ``alg`` if you wish.
.. code-block:: pycon
>>> a = ['apple2.50', '2.3apple']
>>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL)
['2.3apple', 'apple2.50']
Sorting Mixed Types
+++++++++++++++++++
You can mix and match `int`_, `float`_, and `str`_ types when you sort:
.. code-block:: pycon
>>> a = ['4.5', 6, 2.0, '5', 'a']
>>> natsorted(a)
[2.0, '4.5', '5', 6, 'a']
>>> # sorted(a) would raise an "unorderable types" TypeError
Handling Bytes
++++++++++++++
`natsort`_ does not officially support the `bytes`_ type, but
convenience functions are provided that help you decode to `str`_ first:
.. code-block:: pycon
>>> from natsort import as_utf8
>>> a = [b'a', 14.0, 'b']
>>> # natsorted(a) would raise a TypeError (bytes() < str())
>>> natsorted(a, key=as_utf8) == [14.0, b'a', 'b']
True
>>> a = [b'a56', b'a5', b'a6', b'a40']
>>> # natsorted(a) would return the same results as sorted(a)
>>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56']
True
Generating a Reusable Sorting Key and Sorting In-Place
++++++++++++++++++++++++++++++++++++++++++++++++++++++
Under the hood, `natsorted()`_ works by generating a custom sorting
key using `natsort_keygen()`_ and then passes that to the built-in
`sorted()`_. You can use the `natsort_keygen()`_ function yourself to
generate a custom sorting key to sort in-place using the `list.sort()`_
method.
.. code-block:: pycon
>>> from natsort import natsort_keygen
>>> natsort_key = natsort_keygen()
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
>>> natsorted(a) == sorted(a, key=natsort_key)
True
>>> a.sort(key=natsort_key)
>>> a
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
All of the algorithm customizations mentioned in the
`Further Customizing Natsort`_ section can also be applied to
`natsort_keygen()`_ through the *alg* keyword option.
Other Useful Things
+++++++++++++++++++
- recursively descend into lists of lists
- automatic unicode normalization of input data
- `controlling the case-sensitivity`_
- `sorting file paths correctly`_
- `allow custom sorting keys`_
- `accounting for units`_
FAQ
---
How do I debug `natsorted()`_?
The best way to debug `natsorted()`_ is to generate a key using `natsort_keygen()`_
with the same options being passed to `natsorted()`_. One can take a look at
exactly what is being done with their input using this key - it is highly
recommended to `look at this issue describing how to debug`_ for *how* to debug,
and also to review the `How Does Natsort Work?`_ page for *why* `natsort`_ is
doing that to your data.
If you are trying to sort custom classes and running into trouble, please
take a look at https://github.com/SethMMorton/natsort/issues/60. In short,
custom classes are not likely to be sorted correctly if one relies
on the behavior of ``__lt__`` and the other rich comparison operators in
their custom class - it is better to use a ``key`` function with
`natsort`_, or use the `natsort`_ key as part of your rich comparison
operator definition.
`natsort`_ gave me results I didn't expect, and it's a terrible library!
Did you try to debug using the above advice? If so, and you still cannot figure out
the error, then please `file an issue`_.
How *does* `natsort`_ work?
If you don't want to read `How Does Natsort Work?`_,
here is a quick primer.
`natsort`_ provides a `key function`_ that can be passed to `list.sort()`_
or `sorted()`_ in order to modify the default sorting behavior. This key
is generated on-demand with the key generator `natsort_keygen()`_.
`natsorted()`_ is essentially a wrapper for the following code:
.. code-block:: pycon
>>> from natsort import natsort_keygen
>>> natsort_key = natsort_keygen()
>>> sorted(['1', '10', '2'], key=natsort_key)
['1', '2', '10']
Users can further customize `natsort`_ sorting behavior with the ``key``
and/or ``alg`` options (see details in the `Further Customizing Natsort`_
section).
The key generated by `natsort_keygen()`_ *always* returns a `tuple`_. It
does so in the following way (*some details omitted for clarity*):
1. Assume the input is a string, and attempt to split it into numbers and
non-numbers using regular expressions. Numbers are then converted into
either `int`_ or `float`_.
2. If the above fails because the input is not a string, assume the input
is some other sequence (e.g. `list`_ or `tuple`_), and recursively
apply the key to each element of the sequence.
3. If the above fails because the input is not iterable, assume the input
is an `int`_ or `float`_, and just return the input in a `tuple`_.
Because a `tuple`_ is always returned, a `TypeError`_ should not be common
unless one tries to do something odd like sort an `int`_ against a `list`_.
Shell script
------------
`natsort`_ comes with a shell script called `natsort`_, or can also be called
from the command line with ``python -m natsort``. Check out the
`shell script wiki documentation`_ for more details.
Requirements
------------
`natsort`_ requires Python 3.7 or greater.
Optional Dependencies
---------------------
fastnumbers
+++++++++++
The most efficient sorting can occur if you install the
`fastnumbers`_ package
(version >=2.0.0); it helps with the string to number conversions.
`natsort`_ will still run (efficiently) without the package, but if you need
to squeeze out that extra juice it is recommended you include this as a
dependency. `natsort`_ will not require (or check) that
`fastnumbers`_ is installed at installation.
PyICU
+++++
It is recommended that you install `PyICU`_ if you wish to sort in a
locale-dependent manner, see this page on `locale issues`_ for an explanation why.
Installation
------------
Use ``pip``!
.. code-block:: console
$ pip install natsort
If you want to install the `Optional Dependencies`_, you can use the
`"extras" notation`_ at installation time to install those dependencies as
well - use ``fast`` for `fastnumbers`_ and ``icu`` for `PyICU`_.
.. code-block:: console
# Install both optional dependencies.
$ pip install natsort[fast,icu]
# Install just fastnumbers
$ pip install natsort[fast]
How to Run Tests
----------------
Please note that `natsort`_ is NOT set-up to support ``python setup.py test``.
The recommended way to run tests is with `tox`_. After installing ``tox``,
running tests is as simple as executing the following in the `natsort`_ directory:
.. code-block:: console
$ tox
``tox`` will create virtual a virtual environment for your tests and install
all the needed testing requirements for you. You can specify a particular
python version with the ``-e`` flag, e.g. ``tox -e py36``. Static analysis
is done with ``tox -e flake8``. You can see all available testing environments
with ``tox --listenvs``.
How to Build Documentation
--------------------------
If you want to build the documentation for `natsort`_, it is recommended to
use ``tox``:
.. code-block:: console
$ tox -e docs
This will place the documentation in ``build/sphinx/html``.
Dropped Deprecated APIs
-----------------------
In `natsort`_ version 6.0.0, the following APIs and functions were removed
- ``number_type`` keyword argument (deprecated since 3.4.0)
- ``signed`` keyword argument (deprecated since 3.4.0)
- ``exp`` keyword argument (deprecated since 3.4.0)
- ``as_path`` keyword argument (deprecated since 3.4.0)
- ``py3_safe`` keyword argument (deprecated since 3.4.0)
- ``ns.TYPESAFE`` (deprecated since version 5.0.0)
- ``ns.DIGIT`` (deprecated since version 5.0.0)
- ``ns.VERSION`` (deprecated since version 5.0.0)
- ``versorted()`` (discouraged since version 4.0.0,
officially deprecated since version 5.5.0)
- ``index_versorted()`` (discouraged since version 4.0.0,
officially deprecated since version 5.5.0)
In general, if you want to determine if you are using deprecated APIs you
can run your code with the following flag
.. code-block:: console
$ python -Wdefault::DeprecationWarning my-code.py
By default `DeprecationWarnings`_ are not shown, but this will cause them
to be shown. Alternatively, you can just set the environment variable
``PYTHONWARNINGS`` to "default::DeprecationWarning" and then run your code.
Author
------
Seth M. Morton
History
-------
Please visit the changelog `on GitHub`_.
.. _natsort: https://natsort.readthedocs.io/en/stable/index.html
.. _natsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsorted
.. _natsort_keygen(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsort_keygen
.. _realsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.realsorted
.. _humansorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.humansorted
.. _os_sorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.os_sorted
.. _the ns enum: https://natsort.readthedocs.io/en/stable/api.html#natsort.ns
.. _fastnumbers: https://github.com/SethMMorton/fastnumbers
.. _sorted(): https://docs.python.org/3/library/functions.html#sorted
.. _list.sort(): https://docs.python.org/3/library/stdtypes.html#list.sort
.. _key function: https://docs.python.org/3/howto/sorting.html#key-functions
.. _locale: https://docs.python.org/3/library/locale.html
.. _int: https://docs.python.org/3/library/functions.html#int
.. _float: https://docs.python.org/3/library/functions.html#float
.. _str: https://docs.python.org/3/library/stdtypes.html#str
.. _bytes: https://docs.python.org/3/library/stdtypes.html#bytes
.. _list: https://docs.python.org/3/library/stdtypes.html#list
.. _tuple: https://docs.python.org/3/library/stdtypes.html#tuple
.. _TypeError: https://docs.python.org/3/library/exceptions.html#TypeError
.. _DeprecationWarnings: https://docs.python.org/3/library/exceptions.html#DeprecationWarning
.. _"extras" notation: https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras
.. _PyICU: https://pypi.org/project/PyICU
.. _tox: https://tox.readthedocs.io/en/latest/
.. _Examples and Recipes: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes
.. _How Does Natsort Work?: https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F
.. _API: https://natsort.readthedocs.io/en/stable/api.html
.. _on GitHub: https://github.com/SethMMorton/natsort/blob/main/CHANGELOG.md
.. _file an issue: https://github.com/SethMMorton/natsort/issues/new
.. _look at this issue describing how to debug: https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375
.. _controlling the case-sensitivity: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#controlling-case-when-sorting
.. _sorting file paths correctly: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sort-os-generated-paths
.. _allow custom sorting keys: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#using-a-custom-sorting-key
.. _accounting for units: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#accounting-for-units-when-sorting
.. _these version sorting examples: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sorting-more-expressive-versioning-schemes
.. _locale issues: https://github.com/SethMMorton/natsort/wiki/Possible-Issues-with-natsort.humansorted-or-ns.LOCALE
.. _shell script wiki documentation: https://github.com/SethMMorton/natsort/wiki/Shell-Script

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for platformdirs
</title>
</head>
<body>
<h1>
Links for platformdirs
</h1>
<a href="/platformdirs/platformdirs-4.5.0-py3-none-any.whl#sha256=e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3" data-requires-python="&gt;=3.10" data-dist-info-metadata="sha256=985c5997a43e7ceda709d5960894f858eaf8a7b535da3657e25936e8ca86cb5a">
platformdirs-4.5.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,350 @@
Metadata-Version: 2.4
Name: platformdirs
Version: 4.5.0
Summary: A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`.
Project-URL: Changelog, https://github.com/tox-dev/platformdirs/releases
Project-URL: Documentation, https://platformdirs.readthedocs.io
Project-URL: Homepage, https://github.com/tox-dev/platformdirs
Project-URL: Source, https://github.com/tox-dev/platformdirs
Project-URL: Tracker, https://github.com/tox-dev/platformdirs/issues
Maintainer-email: Bernát Gábor <gaborjbernat@gmail.com>, Julian Berman <Julian@GrayVines.com>, Ofek Lev <oss@ofek.dev>, Ronny Pfannschmidt <opensource@ronnypfannschmidt.de>
License-Expression: MIT
License-File: LICENSE
Keywords: appdirs,application,cache,directory,log,user
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Python: >=3.10
Provides-Extra: docs
Requires-Dist: furo>=2025.9.25; extra == 'docs'
Requires-Dist: proselint>=0.14; extra == 'docs'
Requires-Dist: sphinx-autodoc-typehints>=3.2; extra == 'docs'
Requires-Dist: sphinx>=8.2.3; extra == 'docs'
Provides-Extra: test
Requires-Dist: appdirs==1.4.4; extra == 'test'
Requires-Dist: covdefaults>=2.3; extra == 'test'
Requires-Dist: pytest-cov>=7; extra == 'test'
Requires-Dist: pytest-mock>=3.15.1; extra == 'test'
Requires-Dist: pytest>=8.4.2; extra == 'test'
Provides-Extra: type
Requires-Dist: mypy>=1.18.2; extra == 'type'
Description-Content-Type: text/x-rst
The problem
===========
.. image:: https://badge.fury.io/py/platformdirs.svg
:target: https://badge.fury.io/py/platformdirs
.. image:: https://img.shields.io/pypi/pyversions/platformdirs.svg
:target: https://pypi.python.org/pypi/platformdirs/
.. image:: https://github.com/tox-dev/platformdirs/actions/workflows/check.yaml/badge.svg
:target: https://github.com/platformdirs/platformdirs/actions
.. image:: https://static.pepy.tech/badge/platformdirs/month
:target: https://pepy.tech/project/platformdirs
When writing desktop application, finding the right location to store user data
and configuration varies per platform. Even for single-platform apps, there
may by plenty of nuances in figuring out the right location.
For example, if running on macOS, you should use::
~/Library/Application Support/<AppName>
If on Windows (at least English Win) that should be::
C:\Users\<User>\Application Data\Local Settings\<AppAuthor>\<AppName>
or possibly::
C:\Users\<User>\Application Data\<AppAuthor>\<AppName>
for `roaming profiles <https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-vista/cc766489(v=ws.10)>`_ but that is another story.
On Linux (and other Unices), according to the `XDG Basedir Spec`_, it should be::
~/.local/share/<AppName>
.. _XDG Basedir Spec: https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
``platformdirs`` to the rescue
==============================
This kind of thing is what the ``platformdirs`` package is for.
``platformdirs`` will help you choose an appropriate:
- user data dir (``user_data_dir``)
- user config dir (``user_config_dir``)
- user cache dir (``user_cache_dir``)
- site data dir (``site_data_dir``)
- site config dir (``site_config_dir``)
- user log dir (``user_log_dir``)
- user documents dir (``user_documents_dir``)
- user downloads dir (``user_downloads_dir``)
- user pictures dir (``user_pictures_dir``)
- user videos dir (``user_videos_dir``)
- user music dir (``user_music_dir``)
- user desktop dir (``user_desktop_dir``)
- user runtime dir (``user_runtime_dir``)
And also:
- Is slightly opinionated on the directory names used. Look for "OPINION" in
documentation and code for when an opinion is being applied.
Example output
==============
On macOS:
.. code-block:: pycon
>>> from platformdirs import *
>>> appname = "SuperApp"
>>> appauthor = "Acme"
>>> user_data_dir(appname, appauthor)
'/Users/trentm/Library/Application Support/SuperApp'
>>> user_config_dir(appname, appauthor)
'/Users/trentm/Library/Application Support/SuperApp'
>>> user_cache_dir(appname, appauthor)
'/Users/trentm/Library/Caches/SuperApp'
>>> site_data_dir(appname, appauthor)
'/Library/Application Support/SuperApp'
>>> site_config_dir(appname, appauthor)
'/Library/Application Support/SuperApp'
>>> user_log_dir(appname, appauthor)
'/Users/trentm/Library/Logs/SuperApp'
>>> user_documents_dir()
'/Users/trentm/Documents'
>>> user_downloads_dir()
'/Users/trentm/Downloads'
>>> user_pictures_dir()
'/Users/trentm/Pictures'
>>> user_videos_dir()
'/Users/trentm/Movies'
>>> user_music_dir()
'/Users/trentm/Music'
>>> user_desktop_dir()
'/Users/trentm/Desktop'
>>> user_runtime_dir(appname, appauthor)
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp'
On Windows:
.. code-block:: pycon
>>> from platformdirs import *
>>> appname = "SuperApp"
>>> appauthor = "Acme"
>>> user_data_dir(appname, appauthor)
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
>>> user_data_dir(appname, appauthor, roaming=True)
'C:\\Users\\trentm\\AppData\\Roaming\\Acme\\SuperApp'
>>> user_config_dir(appname, appauthor)
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
>>> user_cache_dir(appname, appauthor)
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Cache'
>>> site_data_dir(appname, appauthor)
'C:\\ProgramData\\Acme\\SuperApp'
>>> site_config_dir(appname, appauthor)
'C:\\ProgramData\\Acme\\SuperApp'
>>> user_log_dir(appname, appauthor)
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Logs'
>>> user_documents_dir()
'C:\\Users\\trentm\\Documents'
>>> user_downloads_dir()
'C:\\Users\\trentm\\Downloads'
>>> user_pictures_dir()
'C:\\Users\\trentm\\Pictures'
>>> user_videos_dir()
'C:\\Users\\trentm\\Videos'
>>> user_music_dir()
'C:\\Users\\trentm\\Music'
>>> user_desktop_dir()
'C:\\Users\\trentm\\Desktop'
>>> user_runtime_dir(appname, appauthor)
'C:\\Users\\trentm\\AppData\\Local\\Temp\\Acme\\SuperApp'
On Linux:
.. code-block:: pycon
>>> from platformdirs import *
>>> appname = "SuperApp"
>>> appauthor = "Acme"
>>> user_data_dir(appname, appauthor)
'/home/trentm/.local/share/SuperApp'
>>> user_config_dir(appname)
'/home/trentm/.config/SuperApp'
>>> user_cache_dir(appname, appauthor)
'/home/trentm/.cache/SuperApp'
>>> site_data_dir(appname, appauthor)
'/usr/local/share/SuperApp'
>>> site_data_dir(appname, appauthor, multipath=True)
'/usr/local/share/SuperApp:/usr/share/SuperApp'
>>> site_config_dir(appname)
'/etc/xdg/SuperApp'
>>> os.environ["XDG_CONFIG_DIRS"] = "/etc:/usr/local/etc"
>>> site_config_dir(appname, multipath=True)
'/etc/SuperApp:/usr/local/etc/SuperApp'
>>> user_log_dir(appname, appauthor)
'/home/trentm/.local/state/SuperApp/log'
>>> user_documents_dir()
'/home/trentm/Documents'
>>> user_downloads_dir()
'/home/trentm/Downloads'
>>> user_pictures_dir()
'/home/trentm/Pictures'
>>> user_videos_dir()
'/home/trentm/Videos'
>>> user_music_dir()
'/home/trentm/Music'
>>> user_desktop_dir()
'/home/trentm/Desktop'
>>> user_runtime_dir(appname, appauthor)
'/run/user/{os.getuid()}/SuperApp'
On Android::
>>> from platformdirs import *
>>> appname = "SuperApp"
>>> appauthor = "Acme"
>>> user_data_dir(appname, appauthor)
'/data/data/com.myApp/files/SuperApp'
>>> user_config_dir(appname)
'/data/data/com.myApp/shared_prefs/SuperApp'
>>> user_cache_dir(appname, appauthor)
'/data/data/com.myApp/cache/SuperApp'
>>> site_data_dir(appname, appauthor)
'/data/data/com.myApp/files/SuperApp'
>>> site_config_dir(appname)
'/data/data/com.myApp/shared_prefs/SuperApp'
>>> user_log_dir(appname, appauthor)
'/data/data/com.myApp/cache/SuperApp/log'
>>> user_documents_dir()
'/storage/emulated/0/Documents'
>>> user_downloads_dir()
'/storage/emulated/0/Downloads'
>>> user_pictures_dir()
'/storage/emulated/0/Pictures'
>>> user_videos_dir()
'/storage/emulated/0/DCIM/Camera'
>>> user_music_dir()
'/storage/emulated/0/Music'
>>> user_desktop_dir()
'/storage/emulated/0/Desktop'
>>> user_runtime_dir(appname, appauthor)
'/data/data/com.myApp/cache/SuperApp/tmp'
Note: Some android apps like Termux and Pydroid are used as shells. These
apps are used by the end user to emulate Linux environment. Presence of
``SHELL`` environment variable is used by Platformdirs to differentiate
between general android apps and android apps used as shells. Shell android
apps also support ``XDG_*`` environment variables.
``PlatformDirs`` for convenience
================================
.. code-block:: pycon
>>> from platformdirs import PlatformDirs
>>> dirs = PlatformDirs("SuperApp", "Acme")
>>> dirs.user_data_dir
'/Users/trentm/Library/Application Support/SuperApp'
>>> dirs.user_config_dir
'/Users/trentm/Library/Application Support/SuperApp'
>>> dirs.user_cache_dir
'/Users/trentm/Library/Caches/SuperApp'
>>> dirs.site_data_dir
'/Library/Application Support/SuperApp'
>>> dirs.site_config_dir
'/Library/Application Support/SuperApp'
>>> dirs.user_cache_dir
'/Users/trentm/Library/Caches/SuperApp'
>>> dirs.user_log_dir
'/Users/trentm/Library/Logs/SuperApp'
>>> dirs.user_documents_dir
'/Users/trentm/Documents'
>>> dirs.user_downloads_dir
'/Users/trentm/Downloads'
>>> dirs.user_pictures_dir
'/Users/trentm/Pictures'
>>> dirs.user_videos_dir
'/Users/trentm/Movies'
>>> dirs.user_music_dir
'/Users/trentm/Music'
>>> dirs.user_desktop_dir
'/Users/trentm/Desktop'
>>> dirs.user_runtime_dir
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp'
Per-version isolation
=====================
If you have multiple versions of your app in use that you want to be
able to run side-by-side, then you may want version-isolation for these
dirs::
>>> from platformdirs import PlatformDirs
>>> dirs = PlatformDirs("SuperApp", "Acme", version="1.0")
>>> dirs.user_data_dir
'/Users/trentm/Library/Application Support/SuperApp/1.0'
>>> dirs.user_config_dir
'/Users/trentm/Library/Application Support/SuperApp/1.0'
>>> dirs.user_cache_dir
'/Users/trentm/Library/Caches/SuperApp/1.0'
>>> dirs.site_data_dir
'/Library/Application Support/SuperApp/1.0'
>>> dirs.site_config_dir
'/Library/Application Support/SuperApp/1.0'
>>> dirs.user_log_dir
'/Users/trentm/Library/Logs/SuperApp/1.0'
>>> dirs.user_documents_dir
'/Users/trentm/Documents'
>>> dirs.user_downloads_dir
'/Users/trentm/Downloads'
>>> dirs.user_pictures_dir
'/Users/trentm/Pictures'
>>> dirs.user_videos_dir
'/Users/trentm/Movies'
>>> dirs.user_music_dir
'/Users/trentm/Music'
>>> dirs.user_desktop_dir
'/Users/trentm/Desktop'
>>> dirs.user_runtime_dir
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp/1.0'
Be wary of using this for configuration files though; you'll need to handle
migrating configuration files manually.
Why this Fork?
==============
This repository is a friendly fork of the wonderful work started by
`ActiveState <https://github.com/ActiveState/appdirs>`_ who created
``appdirs``, this package's ancestor.
Maintaining an open source project is no easy task, particularly
from within an organization, and the Python community is indebted
to ``appdirs`` (and to Trent Mick and Jeff Rouse in particular) for
creating an incredibly useful simple module, as evidenced by the wide
number of users it has attracted over the years.
Nonetheless, given the number of long-standing open issues
and pull requests, and no clear path towards `ensuring
that maintenance of the package would continue or grow
<https://github.com/ActiveState/appdirs/issues/79>`_, this fork was
created.
Contributions are most welcome.

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for pyproject-parser
</title>
</head>
<body>
<h1>
Links for pyproject-parser
</h1>
<a href="/pyproject-parser/pyproject_parser-0.13.0-py3-none-any.whl#sha256=e967a66e84ade12497016af86f25a60d0fdb8c8e6e4bae0dc9d3707aeaf03bb5" data-requires-python="&gt;=3.6.1" data-dist-info-metadata="sha256=367990763f992d283c2e5399d1342a04362cd8ac807c7739abacad041cc4c440">
pyproject_parser-0.13.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,219 @@
Metadata-Version: 2.4
Name: pyproject-parser
Version: 0.13.0
Summary: Parser for 'pyproject.toml'
Project-URL: Homepage, https://github.com/repo-helper/pyproject-parser
Project-URL: Issue Tracker, https://github.com/repo-helper/pyproject-parser/issues
Project-URL: Source Code, https://github.com/repo-helper/pyproject-parser
Project-URL: Documentation, https://pyproject-parser.readthedocs.io/en/latest
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2021 Dominic Davis-Foster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
License-File: LICENSE
Keywords: metadata,packaging,pep518,pep621,pyproject,toml
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Typing :: Typed
Requires-Python: >=3.6.1
Requires-Dist: apeye-core>=1.0.0
Requires-Dist: attrs>=20.3.0
Requires-Dist: dom-toml>=2.0.0
Requires-Dist: domdf-python-tools>=2.8.0
Requires-Dist: natsort>=7.1.1
Requires-Dist: packaging>=20.9
Requires-Dist: shippinglabel>=1.0.0
Requires-Dist: typing-extensions!=4.7.0,>=3.7.4.3
Provides-Extra: all
Requires-Dist: click>=7.1.2; extra == 'all'
Requires-Dist: consolekit>=1.4.1; extra == 'all'
Requires-Dist: docutils>=0.16; extra == 'all'
Requires-Dist: readme-renderer[md]>=27.0; extra == 'all'
Requires-Dist: sdjson>=0.3.1; extra == 'all'
Provides-Extra: cli
Requires-Dist: click>=7.1.2; extra == 'cli'
Requires-Dist: consolekit>=1.4.1; extra == 'cli'
Requires-Dist: sdjson>=0.3.1; extra == 'cli'
Provides-Extra: readme
Requires-Dist: docutils>=0.16; extra == 'readme'
Requires-Dist: readme-renderer[md]>=27.0; extra == 'readme'
Description-Content-Type: text/x-rst
#################
pyproject-parser
#################
.. start short_desc
**Parser for 'pyproject.toml'**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/pyproject-parser/latest?logo=read-the-docs
:target: https://pyproject-parser.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/repo-helper/pyproject-parser/workflows/Docs%20Check/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/repo-helper/pyproject-parser/workflows/Linux/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/repo-helper/pyproject-parser/workflows/Windows/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/repo-helper/pyproject-parser/workflows/macOS/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/repo-helper/pyproject-parser/workflows/Flake8/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/repo-helper/pyproject-parser/workflows/mypy/badge.svg
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/pyproject-parser/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/pyproject-parser/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/pyproject-parser/master?logo=coveralls
:target: https://coveralls.io/github/repo-helper/pyproject-parser?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/pyproject-parser?logo=codefactor
:target: https://www.codefactor.io/repository/github/repo-helper/pyproject-parser
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/pyproject-parser
:target: https://pypi.org/project/pyproject-parser/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/pyproject-parser?logo=python&logoColor=white
:target: https://pypi.org/project/pyproject-parser/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/pyproject-parser
:target: https://pypi.org/project/pyproject-parser/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/pyproject-parser
:target: https://pypi.org/project/pyproject-parser/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/pyproject-parser?logo=anaconda
:target: https://anaconda.org/conda-forge/pyproject-parser
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/pyproject-parser?label=conda%7Cplatform
:target: https://anaconda.org/conda-forge/pyproject-parser
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/repo-helper/pyproject-parser
:target: https://github.com/repo-helper/pyproject-parser/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/pyproject-parser
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/pyproject-parser/v0.13.0
:target: https://github.com/repo-helper/pyproject-parser/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/pyproject-parser
:target: https://github.com/repo-helper/pyproject-parser/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/pyproject-parser
:target: https://pypi.org/project/pyproject-parser/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``pyproject-parser`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install pyproject-parser
To install with ``conda``:
.. code-block:: bash
$ conda install -c conda-forge pyproject-parser
.. end installation
``pyproject-parser`` also has an optional README validation feature, which checks the README will render correctly on PyPI.
This requires that the ``readme`` extra is installed:
.. code-block:: bash
$ python -m pip install pyproject-parser[readme]

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for requests
</title>
</head>
<body>
<h1>
Links for requests
</h1>
<a href="/requests/requests-2.32.5-py3-none-any.whl#sha256=2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" data-requires-python="&gt;=3.9" data-dist-info-metadata="sha256=65b5a08da81f4915513e760965ff14b7518f65b7bb3efe0dab364bbcc4d40cb0">
requests-2.32.5-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,133 @@
Metadata-Version: 2.4
Name: requests
Version: 2.32.5
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache-2.0
Project-URL: Documentation, https://requests.readthedocs.io
Project-URL: Source, https://github.com/psf/requests
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Web Environment
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Software Development :: Libraries
Requires-Python: >=3.9
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: charset_normalizer<4,>=2
Requires-Dist: idna<4,>=2.5
Requires-Dist: urllib3<3,>=1.21.1
Requires-Dist: certifi>=2017.4.17
Provides-Extra: security
Provides-Extra: socks
Requires-Dist: PySocks!=1.5.7,>=1.5.6; extra == "socks"
Provides-Extra: use-chardet-on-py3
Requires-Dist: chardet<6,>=3.0.2; extra == "use-chardet-on-py3"
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: description-content-type
Dynamic: home-page
Dynamic: license
Dynamic: license-file
Dynamic: project-url
Dynamic: provides-extra
Dynamic: requires-dist
Dynamic: requires-python
Dynamic: summary
# Requests
**Requests** is a simple, yet elegant, HTTP library.
```python
>>> import requests
>>> r = requests.get('https://httpbin.org/basic-auth/user/pass', auth=('user', 'pass'))
>>> r.status_code
200
>>> r.headers['content-type']
'application/json; charset=utf8'
>>> r.encoding
'utf-8'
>>> r.text
'{"authenticated": true, ...'
>>> r.json()
{'authenticated': True, ...}
```
Requests allows you to send HTTP/1.1 requests extremely easily. Theres no need to manually add query strings to your URLs, or to form-encode your `PUT` & `POST` data — but nowadays, just use the `json` method!
Requests is one of the most downloaded Python packages today, pulling in around `30M downloads / week`— according to GitHub, Requests is currently [depended upon](https://github.com/psf/requests/network/dependents?package_id=UGFja2FnZS01NzA4OTExNg%3D%3D) by `1,000,000+` repositories. You may certainly put your trust in this code.
[![Downloads](https://static.pepy.tech/badge/requests/month)](https://pepy.tech/project/requests)
[![Supported Versions](https://img.shields.io/pypi/pyversions/requests.svg)](https://pypi.org/project/requests)
[![Contributors](https://img.shields.io/github/contributors/psf/requests.svg)](https://github.com/psf/requests/graphs/contributors)
## Installing Requests and Supported Versions
Requests is available on PyPI:
```console
$ python -m pip install requests
```
Requests officially supports Python 3.9+.
## Supported Features & BestPractices
Requests is ready for the demands of building robust and reliable HTTPspeaking applications, for the needs of today.
- Keep-Alive & Connection Pooling
- International Domains and URLs
- Sessions with Cookie Persistence
- Browser-style TLS/SSL Verification
- Basic & Digest Authentication
- Familiar `dict`like Cookies
- Automatic Content Decompression and Decoding
- Multi-part File Uploads
- SOCKS Proxy Support
- Connection Timeouts
- Streaming Downloads
- Automatic honoring of `.netrc`
- Chunked HTTP Requests
## API Reference and User Guide available on [Read the Docs](https://requests.readthedocs.io)
[![Read the Docs](https://raw.githubusercontent.com/psf/requests/main/ext/ss.png)](https://requests.readthedocs.io)
## Cloning the repository
When cloning the Requests repository, you may need to add the `-c
fetch.fsck.badTimezone=ignore` flag to avoid an error about a bad commit timestamp (see
[this issue](https://github.com/psf/requests/issues/2690) for more background):
```shell
git clone -c fetch.fsck.badTimezone=ignore https://github.com/psf/requests.git
```
You can also apply this setting to your global Git config:
```shell
git config --global fetch.fsck.badTimezone ignore
```
---
[![Kenneth Reitz](https://raw.githubusercontent.com/psf/requests/main/ext/kr.png)](https://kennethreitz.org) [![Python Software Foundation](https://raw.githubusercontent.com/psf/requests/main/ext/psf.png)](https://www.python.org/psf)

View File

@ -16,6 +16,10 @@
setuptools_scm-9.2.2-py3-none-any.whl setuptools_scm-9.2.2-py3-none-any.whl
</a> </a>
<br /> <br />
<a href="/setuptools-scm/setuptools_scm-8.3.1-py3-none-any.whl#sha256=332ca0d43791b818b841213e76b1971b7711a960761c5bea5fc5cdb5196fbce3" data-requires-python="&gt;=3.8" data-dist-info-metadata="sha256=a4a5a66c7dd5f9113d3ae03392ca0918174fdeac4f1b261e63d259f9756df741">
setuptools_scm-8.3.1-py3-none-any.whl
</a>
<br />
<a href="/setuptools-scm/setuptools_scm-7.1.0-py3-none-any.whl#sha256=73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=b3f2e97c7a459abfbe07dde0547e24e5aac33d4afcfd086cda59fdf855c48d8d"> <a href="/setuptools-scm/setuptools_scm-7.1.0-py3-none-any.whl#sha256=73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=b3f2e97c7a459abfbe07dde0547e24e5aac33d4afcfd086cda59fdf855c48d8d">
setuptools_scm-7.1.0-py3-none-any.whl setuptools_scm-7.1.0-py3-none-any.whl
</a> </a>

View File

@ -0,0 +1,173 @@
Metadata-Version: 2.4
Name: setuptools-scm
Version: 8.3.1
Summary: the blessed package to manage your versions by scm tags
Author-email: Ronny Pfannschmidt <opensource@ronnypfannschmidt.de>
License: Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Project-URL: documentation, https://setuptools-scm.readthedocs.io/
Project-URL: repository, https://github.com/pypa/setuptools-scm/
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Topic :: Software Development :: Libraries
Classifier: Topic :: Software Development :: Version Control
Classifier: Topic :: System :: Software Distribution
Classifier: Topic :: Utilities
Requires-Python: >=3.8
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: packaging>=20
Requires-Dist: setuptools
Requires-Dist: tomli>=1; python_version < "3.11"
Requires-Dist: typing-extensions; python_version < "3.10"
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
Provides-Extra: docs
Requires-Dist: entangled-cli~=2.0; extra == "docs"
Requires-Dist: mkdocs; extra == "docs"
Requires-Dist: mkdocs-entangled-plugin; extra == "docs"
Requires-Dist: mkdocs-include-markdown-plugin; extra == "docs"
Requires-Dist: mkdocs-material; extra == "docs"
Requires-Dist: mkdocstrings[python]; extra == "docs"
Requires-Dist: pygments; extra == "docs"
Provides-Extra: rich
Requires-Dist: rich; extra == "rich"
Provides-Extra: test
Requires-Dist: build; extra == "test"
Requires-Dist: pytest; extra == "test"
Requires-Dist: rich; extra == "test"
Requires-Dist: typing-extensions; python_version < "3.11" and extra == "test"
Requires-Dist: wheel; extra == "test"
Provides-Extra: toml
Dynamic: license-file
# setuptools-scm
[![github ci](https://github.com/pypa/setuptools-scm/actions/workflows/python-tests.yml/badge.svg)](https://github.com/pypa/setuptools-scm/actions/workflows/python-tests.yml)
[![Documentation Status](https://readthedocs.org/projects/setuptools-scm/badge/?version=latest)](https://setuptools-scm.readthedocs.io/en/latest/?badge=latest)
[![tidelift](https://tidelift.com/badges/package/pypi/setuptools-scm) ](https://tidelift.com/subscription/pkg/pypi-setuptools-scm?utm_source=pypi-setuptools-scm&utm_medium=readme)
## about
[setuptools-scm] extracts Python package versions from `git` or `hg` metadata
instead of declaring them as the version argument
or in a Source Code Managed (SCM) managed file.
Additionally [setuptools-scm] provides `setuptools` with a list of
files that are managed by the SCM
<br/>
(i.e. it automatically adds all the SCM-managed files to the sdist).
<br/>
Unwanted files must be excluded via `MANIFEST.in`
or [configuring Git archive][git-archive-docs].
## `pyproject.toml` usage
The preferred way to configure [setuptools-scm] is to author
settings in a `tool.setuptools_scm` section of `pyproject.toml`.
This feature requires setuptools 61 or later.
First, ensure that [setuptools-scm] is present during the project's
build step by specifying it as one of the build requirements.
```toml title="pyproject.toml"
[build-system]
requires = ["setuptools>=64", "setuptools-scm>=8"]
build-backend = "setuptools.build_meta"
```
That will be sufficient to require [setuptools-scm] for projects
that support [PEP 518] like [pip] and [build].
[pip]: https://pypi.org/project/pip
[build]: https://pypi.org/project/build
[PEP 518]: https://peps.python.org/pep-0518/
To enable version inference, you need to set the version
dynamically in the `project` section of `pyproject.toml`:
```toml title="pyproject.toml"
[project]
# version = "0.0.1" # Remove any existing version parameter.
dynamic = ["version"]
[tool.setuptools_scm]
```
Additionally, a version file can be written by specifying:
```toml title="pyproject.toml"
[tool.setuptools_scm]
version_file = "pkg/_version.py"
```
Where `pkg` is the name of your package.
If you need to confirm which version string is being generated or debug the configuration,
you can install [setuptools-scm] directly in your working environment and run:
```console
$ python -m setuptools_scm
# To explore other options, try:
$ python -m setuptools_scm --help
```
For further configuration see the [documentation].
[setuptools-scm]: https://github.com/pypa/setuptools-scm
[documentation]: https://setuptools-scm.readthedocs.io/
[git-archive-docs]: https://setuptools-scm.readthedocs.io/en/stable/usage/#builtin-mechanisms-for-obtaining-version-numbers
## Interaction with Enterprise Distributions
Some enterprise distributions like RHEL7
ship rather old setuptools versions.
In those cases its typically possible to build by using an sdist against `setuptools-scm<2.0`.
As those old setuptools versions lack sensible types for versions,
modern [setuptools-scm] is unable to support them sensibly.
It's strongly recommended to build a wheel artifact using modern Python and setuptools,
then installing the artifact instead of trying to run against old setuptools versions.
## Code of Conduct
Everyone interacting in the [setuptools-scm] project's codebases, issue
trackers, chat rooms, and mailing lists is expected to follow the
[PSF Code of Conduct].
[PSF Code of Conduct]: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md
## Security Contact
To report a security vulnerability, please use the
[Tidelift security contact](https://tidelift.com/security).
Tidelift will coordinate the fix and disclosure.

View File

@ -16,5 +16,9 @@
setuptools-80.9.0-py3-none-any.whl setuptools-80.9.0-py3-none-any.whl
</a> </a>
<br /> <br />
<a href="/setuptools/setuptools-67.1.0-py3-none-any.whl#sha256=a7687c12b444eaac951ea87a9627c4f904ac757e7abdc5aac32833234af90378" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=286148a5442173faf79ca5aec07d58e4032d88a448099256ea59e50e5c20181b">
setuptools-67.1.0-py3-none-any.whl
</a>
<br />
</body> </body>
</html> </html>

Binary file not shown.

View File

@ -0,0 +1,137 @@
Metadata-Version: 2.1
Name: setuptools
Version: 67.1.0
Summary: Easily download, build, install, upgrade, and uninstall Python packages
Home-page: https://github.com/pypa/setuptools
Author: Python Packaging Authority
Author-email: distutils-sig@python.org
Project-URL: Documentation, https://setuptools.pypa.io/
Project-URL: Changelog, https://setuptools.pypa.io/en/stable/history.html
Keywords: CPAN PyPI distutils eggs package management
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Topic :: System :: Systems Administration
Classifier: Topic :: Utilities
Requires-Python: >=3.7
License-File: LICENSE
Provides-Extra: certs
Provides-Extra: docs
Requires-Dist: sphinx (>=3.5) ; extra == 'docs'
Requires-Dist: jaraco.packaging (>=9) ; extra == 'docs'
Requires-Dist: rst.linker (>=1.9) ; extra == 'docs'
Requires-Dist: furo ; extra == 'docs'
Requires-Dist: sphinx-lint ; extra == 'docs'
Requires-Dist: jaraco.tidelift (>=1.4) ; extra == 'docs'
Requires-Dist: pygments-github-lexers (==0.0.5) ; extra == 'docs'
Requires-Dist: sphinx-favicon ; extra == 'docs'
Requires-Dist: sphinx-inline-tabs ; extra == 'docs'
Requires-Dist: sphinx-reredirects ; extra == 'docs'
Requires-Dist: sphinxcontrib-towncrier ; extra == 'docs'
Requires-Dist: sphinx-notfound-page (==0.8.3) ; extra == 'docs'
Requires-Dist: sphinx-hoverxref (<2) ; extra == 'docs'
Provides-Extra: ssl
Provides-Extra: testing
Requires-Dist: pytest (>=6) ; extra == 'testing'
Requires-Dist: pytest-checkdocs (>=2.4) ; extra == 'testing'
Requires-Dist: flake8 (<5) ; extra == 'testing'
Requires-Dist: pytest-enabler (>=1.3) ; extra == 'testing'
Requires-Dist: pytest-perf ; extra == 'testing'
Requires-Dist: flake8-2020 ; extra == 'testing'
Requires-Dist: virtualenv (>=13.0.0) ; extra == 'testing'
Requires-Dist: wheel ; extra == 'testing'
Requires-Dist: pip (>=19.1) ; extra == 'testing'
Requires-Dist: jaraco.envs (>=2.2) ; extra == 'testing'
Requires-Dist: pytest-xdist ; extra == 'testing'
Requires-Dist: jaraco.path (>=3.2.0) ; extra == 'testing'
Requires-Dist: build[virtualenv] ; extra == 'testing'
Requires-Dist: filelock (>=3.4.0) ; extra == 'testing'
Requires-Dist: pip-run (>=8.8) ; extra == 'testing'
Requires-Dist: ini2toml[lite] (>=0.9) ; extra == 'testing'
Requires-Dist: tomli-w (>=1.0.0) ; extra == 'testing'
Requires-Dist: pytest-timeout ; extra == 'testing'
Provides-Extra: testing-integration
Requires-Dist: pytest ; extra == 'testing-integration'
Requires-Dist: pytest-xdist ; extra == 'testing-integration'
Requires-Dist: pytest-enabler ; extra == 'testing-integration'
Requires-Dist: virtualenv (>=13.0.0) ; extra == 'testing-integration'
Requires-Dist: tomli ; extra == 'testing-integration'
Requires-Dist: wheel ; extra == 'testing-integration'
Requires-Dist: jaraco.path (>=3.2.0) ; extra == 'testing-integration'
Requires-Dist: jaraco.envs (>=2.2) ; extra == 'testing-integration'
Requires-Dist: build[virtualenv] ; extra == 'testing-integration'
Requires-Dist: filelock (>=3.4.0) ; extra == 'testing-integration'
Requires-Dist: pytest-black (>=0.3.7) ; (platform_python_implementation != "PyPy") and extra == 'testing'
Requires-Dist: pytest-cov ; (platform_python_implementation != "PyPy") and extra == 'testing'
Requires-Dist: pytest-mypy (>=0.9.1) ; (platform_python_implementation != "PyPy") and extra == 'testing'
Requires-Dist: pytest-flake8 ; (python_version < "3.12") and extra == 'testing'
.. image:: https://img.shields.io/pypi/v/setuptools.svg
:target: https://pypi.org/project/setuptools
.. image:: https://img.shields.io/pypi/pyversions/setuptools.svg
.. image:: https://github.com/pypa/setuptools/workflows/tests/badge.svg
:target: https://github.com/pypa/setuptools/actions?query=workflow%3A%22tests%22
:alt: tests
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black
:alt: Code style: Black
.. image:: https://img.shields.io/readthedocs/setuptools/latest.svg
:target: https://setuptools.pypa.io
.. image:: https://img.shields.io/badge/skeleton-2023-informational
:target: https://blog.jaraco.com/skeleton
.. image:: https://img.shields.io/codecov/c/github/pypa/setuptools/master.svg?logo=codecov&logoColor=white
:target: https://codecov.io/gh/pypa/setuptools
.. image:: https://tidelift.com/badges/github/pypa/setuptools?style=flat
:target: https://tidelift.com/subscription/pkg/pypi-setuptools?utm_source=pypi-setuptools&utm_medium=readme
.. image:: https://img.shields.io/discord/803025117553754132
:target: https://discord.com/channels/803025117553754132/815945031150993468
:alt: Discord
See the `Installation Instructions
<https://packaging.python.org/installing/>`_ in the Python Packaging
User's Guide for instructions on installing, upgrading, and uninstalling
Setuptools.
Questions and comments should be directed to `GitHub Discussions
<https://github.com/pypa/setuptools/discussions>`_.
Bug reports and especially tested patches may be
submitted directly to the `bug tracker
<https://github.com/pypa/setuptools/issues>`_.
Code of Conduct
===============
Everyone interacting in the setuptools project's codebases, issue trackers,
chat rooms, and fora is expected to follow the
`PSF Code of Conduct <https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md>`_.
For Enterprise
==============
Available as part of the Tidelift Subscription.
Setuptools and the maintainers of thousands of other packages are working with Tidelift to deliver one enterprise subscription that covers all of the open source you use.
`Learn more <https://tidelift.com/subscription/pkg/pypi-setuptools?utm_source=pypi-setuptools&utm_medium=referral&utm_campaign=github>`_.
Security Contact
================
To report a security vulnerability, please use the
`Tidelift security contact <https://tidelift.com/security>`_.
Tidelift will coordinate the fix and disclosure.

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for shippinglabel
</title>
</head>
<body>
<h1>
Links for shippinglabel
</h1>
<a href="/shippinglabel/shippinglabel-2.3.0-py3-none-any.whl#sha256=37811ae077f4a40e524efa013a136e3e2d295a96ad149a4e84b7788dd7f3b64a" data-requires-python="&gt;=3.7" data-dist-info-metadata="sha256=7bcc0acc49d2dbb7bbaf848bd89a455ad101692f6f5ddf45f1a22c767b03ff61">
shippinglabel-2.3.0-py3-none-any.whl
</a>
<br />
</body>
</html>

View File

@ -0,0 +1,198 @@
Metadata-Version: 2.4
Name: shippinglabel
Version: 2.3.0
Summary: Utilities for handling packages.
Project-URL: Homepage, https://github.com/domdfcoding/shippinglabel
Project-URL: Issue Tracker, https://github.com/domdfcoding/shippinglabel/issues
Project-URL: Source Code, https://github.com/domdfcoding/shippinglabel
Project-URL: Documentation, https://shippinglabel.readthedocs.io/en/latest
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: Copyright (c) 2020-2022 Dominic Davis-Foster
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.
License-File: LICENSE
Keywords: conda,packaging,pypi,requirements
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Typing :: Typed
Requires-Python: >=3.7
Requires-Dist: dist-meta>=0.1.2
Requires-Dist: dom-toml>=0.2.2
Requires-Dist: domdf-python-tools>=3.1.0
Requires-Dist: packaging>=20.9
Requires-Dist: typing-extensions>=3.7.4.3
Description-Content-Type: text/x-rst
#############
shippinglabel
#############
.. start short_desc
**Utilities for handling packages.**
.. end short_desc
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Anaconda
- |conda-version| |conda-platform|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/shippinglabel/latest?logo=read-the-docs
:target: https://shippinglabel.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/domdfcoding/shippinglabel/workflows/Docs%20Check/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/domdfcoding/shippinglabel/workflows/Linux/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/domdfcoding/shippinglabel/workflows/Windows/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/domdfcoding/shippinglabel/workflows/macOS/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/domdfcoding/shippinglabel/workflows/Flake8/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/domdfcoding/shippinglabel/workflows/mypy/badge.svg
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/shippinglabel/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/shippinglabel/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/shippinglabel/master?logo=coveralls
:target: https://coveralls.io/github/domdfcoding/shippinglabel?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/shippinglabel?logo=codefactor
:target: https://www.codefactor.io/repository/github/domdfcoding/shippinglabel
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/shippinglabel
:target: https://pypi.org/project/shippinglabel/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/shippinglabel?logo=python&logoColor=white
:target: https://pypi.org/project/shippinglabel/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/shippinglabel
:target: https://pypi.org/project/shippinglabel/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/shippinglabel
:target: https://pypi.org/project/shippinglabel/
:alt: PyPI - Wheel
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/shippinglabel?logo=anaconda
:target: https://anaconda.org/conda-forge/shippinglabel
:alt: Conda - Package Version
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/shippinglabel?label=conda%7Cplatform
:target: https://anaconda.org/conda-forge/shippinglabel
:alt: Conda - Platform
.. |license| image:: https://img.shields.io/github/license/domdfcoding/shippinglabel
:target: https://github.com/domdfcoding/shippinglabel/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/shippinglabel
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/shippinglabel/v2.3.0
:target: https://github.com/domdfcoding/shippinglabel/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/shippinglabel
:target: https://github.com/domdfcoding/shippinglabel/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/shippinglabel
:target: https://pypi.org/project/shippinglabel/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``shippinglabel`` can be installed from PyPI or Anaconda.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install shippinglabel
To install with ``conda``:
.. code-block:: bash
$ conda install -c conda-forge shippinglabel
.. end installation
``shippinglabel`` includes a vendored copy of `trove-classifiers <https://pypi.org/project/trove-classifiers/>`_.
If you install a newer version of ``trove-classifiers`` with pip ``shippinglabel`` will use that version instead.

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="generator" content="simple503 version 0.4.0" />
<meta name="pypi:repository-version" content="1.0" />
<meta charset="UTF-8" />
<title>
Links for simple503
</title>
</head>
<body>
<h1>
Links for simple503
</h1>
<a href="/simple503/simple503-0.4.0-py3-none-any.whl#sha256=01046e8e15392d7239a9df97128939caecd9d7699b9d1dd4517e37a058b67c8f" data-requires-python="&gt;=3.6" data-dist-info-metadata="sha256=e53037c70d835cb1643b352dd5ddf9026dde51eed4e0fc5065c507617473e91a">
simple503-0.4.0-py3-none-any.whl
</a>
<br />
</body>
</html>

Binary file not shown.

View File

@ -0,0 +1,183 @@
Metadata-Version: 2.1
Name: simple503
Version: 0.4.0
Summary: PEP 503 Python package repository generator.
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
License: MIT
Keywords: pep503,pep658,pip,pypi
Home-page: https://github.com/repo-helper/simple503
Project-URL: Issue Tracker, https://github.com/repo-helper/simple503/issues
Project-URL: Source Code, https://github.com/repo-helper/simple503
Project-URL: Documentation, https://simple503.readthedocs.io/en/latest
Platform: Windows
Platform: macOS
Platform: Linux
Classifier: Development Status :: 3 - Alpha
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: System :: Archiving :: Packaging
Classifier: Typing :: Typed
Requires-Python: >=3.6
Requires-Dist: airium>=0.2.2
Requires-Dist: apeye>=1.0.1
Requires-Dist: click>=8.0.1
Requires-Dist: consolekit>=1.2.2
Requires-Dist: dataclasses>=0.8; python_version == "3.6"
Requires-Dist: dist-meta>=0.1.0
Requires-Dist: dom-toml>=0.5.1
Requires-Dist: domdf-python-tools>=2.9.1
Requires-Dist: natsort>=7.1.1
Requires-Dist: shippinglabel>=0.15.0
Requires-Dist: typing-extensions>=3.7.4.1
Requires-Dist: beautifulsoup4>=4.9.3; extra == 'incremental'
Requires-Dist: beautifulsoup4>=4.9.3; extra == 'all'
Provides-Extra: incremental
Provides-Extra: all
Description-Content-Type: text/x-rst
==========
simple503
==========
.. start short_desc
**PEP 503 Python package repository generator.**
.. end short_desc
``simple503`` generates a static, `PEP 503`_ simple repository of Python distributions.
It takes a directory of Python `wheels`_ and generates the necessary directories and ``index.html`` files.
The source directory can optionally be pre-sorted by project name, or ``simple503`` can do this for you.
An example repository can be seen at https://repo-helper.uk/simple503/
.. _PEP 503: https://www.python.org/dev/peps/pep-0503/
.. _wheels: https://www.python.org/dev/peps/pep-0427/
.. start shields
.. list-table::
:stub-columns: 1
:widths: 10 90
* - Docs
- |docs| |docs_check|
* - Tests
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
* - PyPI
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
* - Activity
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
* - QA
- |codefactor| |actions_flake8| |actions_mypy|
* - Other
- |license| |language| |requires|
.. |docs| image:: https://img.shields.io/readthedocs/simple503/latest?logo=read-the-docs
:target: https://simple503.readthedocs.io/en/latest
:alt: Documentation Build Status
.. |docs_check| image:: https://github.com/repo-helper/simple503/workflows/Docs%20Check/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Docs+Check%22
:alt: Docs Check Status
.. |actions_linux| image:: https://github.com/repo-helper/simple503/workflows/Linux/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Linux%22
:alt: Linux Test Status
.. |actions_windows| image:: https://github.com/repo-helper/simple503/workflows/Windows/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Windows%22
:alt: Windows Test Status
.. |actions_macos| image:: https://github.com/repo-helper/simple503/workflows/macOS/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22macOS%22
:alt: macOS Test Status
.. |actions_flake8| image:: https://github.com/repo-helper/simple503/workflows/Flake8/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Flake8%22
:alt: Flake8 Status
.. |actions_mypy| image:: https://github.com/repo-helper/simple503/workflows/mypy/badge.svg
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22mypy%22
:alt: mypy status
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/simple503/badge.svg
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/simple503/
:alt: Requirements Status
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/simple503/master?logo=coveralls
:target: https://coveralls.io/github/repo-helper/simple503?branch=master
:alt: Coverage
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/simple503?logo=codefactor
:target: https://www.codefactor.io/repository/github/repo-helper/simple503
:alt: CodeFactor Grade
.. |pypi-version| image:: https://img.shields.io/pypi/v/simple503
:target: https://pypi.org/project/simple503/
:alt: PyPI - Package Version
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/simple503?logo=python&logoColor=white
:target: https://pypi.org/project/simple503/
:alt: PyPI - Supported Python Versions
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/simple503
:target: https://pypi.org/project/simple503/
:alt: PyPI - Supported Implementations
.. |wheel| image:: https://img.shields.io/pypi/wheel/simple503
:target: https://pypi.org/project/simple503/
:alt: PyPI - Wheel
.. |license| image:: https://img.shields.io/github/license/repo-helper/simple503
:target: https://github.com/repo-helper/simple503/blob/master/LICENSE
:alt: License
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/simple503
:alt: GitHub top language
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/simple503/v0.4.0
:target: https://github.com/repo-helper/simple503/pulse
:alt: GitHub commits since tagged version
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/simple503
:target: https://github.com/repo-helper/simple503/commit/master
:alt: GitHub last commit
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
:alt: Maintenance
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/simple503
:target: https://pypi.org/project/simple503/
:alt: PyPI - Downloads
.. end shields
Installation
--------------
.. start installation
``simple503`` can be installed from PyPI.
To install with ``pip``:
.. code-block:: bash
$ python -m pip install simple503
.. end installation

Some files were not shown because too many files have changed in this diff Show More