Compare commits
No commits in common. "4.5.0" and "4.1.0" have entirely different histories.
6
VERSION
6
VERSION
|
|
@ -1,3 +1,3 @@
|
|||
4.5.0
|
||||
last_version: 4.4.0
|
||||
source_branch: feature/mirror-update-pr-10
|
||||
4.1.0
|
||||
last_version: 4.0.0
|
||||
source_branch: feature/SIENTIAPDE-1379
|
||||
|
|
|
|||
|
|
@ -0,0 +1,134 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Applies current directory content as a patch to an existing git repository
|
||||
#
|
||||
# Usage: ./git-apply-patch.sh <config-file>
|
||||
#
|
||||
# Config file format:
|
||||
# GIT_URL=https://github.com/user/repo.git
|
||||
# GIT_USER=username
|
||||
# GIT_TOKEN=your_token_or_password
|
||||
#
|
||||
# VERSION file format:
|
||||
# <new_version>
|
||||
# last_version: <previous_version>
|
||||
# source_branch: <branch_name> (optional)
|
||||
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <config-file>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONFIG_FILE="$1"
|
||||
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
echo "Error: Config file '$CONFIG_FILE' not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load config file
|
||||
source "$CONFIG_FILE"
|
||||
|
||||
# Validate required fields
|
||||
if [ -z "$GIT_URL" ]; then
|
||||
echo "Error: GIT_URL not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$GIT_USER" ]; then
|
||||
echo "Error: GIT_USER not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$GIT_TOKEN" ]; then
|
||||
echo "Error: GIT_TOKEN not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read VERSION file
|
||||
if [ ! -f "VERSION" ]; then
|
||||
echo "Error: VERSION file not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NEW_VERSION=$(sed -n '1p' VERSION)
|
||||
LAST_VERSION=$(sed -n '2p' VERSION | sed 's/last_version: //')
|
||||
SOURCE_BRANCH=$(sed -n '3p' VERSION | sed 's/source_branch: //')
|
||||
|
||||
if [ -z "$NEW_VERSION" ]; then
|
||||
echo "Error: New version not found in VERSION file (line 1)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$LAST_VERSION" ]; then
|
||||
echo "Error: last_version not found in VERSION file (line 2)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "New version: $NEW_VERSION"
|
||||
echo "Last version: $LAST_VERSION"
|
||||
echo "Source branch: ${SOURCE_BRANCH:-N/A}"
|
||||
|
||||
# Build authenticated URL
|
||||
if [[ "$GIT_URL" == https://* ]]; then
|
||||
URL_WITHOUT_PROTOCOL="${GIT_URL#https://}"
|
||||
AUTH_URL="https://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
|
||||
elif [[ "$GIT_URL" == http://* ]]; then
|
||||
URL_WITHOUT_PROTOCOL="${GIT_URL#http://}"
|
||||
AUTH_URL="http://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
|
||||
else
|
||||
echo "Error: URL must start with http:// or https://"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create temp directory for cloning
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
CURRENT_DIR=$(pwd)
|
||||
|
||||
echo ""
|
||||
echo "Cloning repository from branch '$LAST_VERSION'..."
|
||||
git clone --branch "$LAST_VERSION" --single-branch "$AUTH_URL" "$TEMP_DIR" 2>/dev/null || {
|
||||
echo "Branch '$LAST_VERSION' not found, trying to clone default branch..."
|
||||
git clone "$AUTH_URL" "$TEMP_DIR"
|
||||
cd "$TEMP_DIR"
|
||||
git checkout -b "$LAST_VERSION" 2>/dev/null || git checkout "$LAST_VERSION"
|
||||
cd "$CURRENT_DIR"
|
||||
}
|
||||
|
||||
echo "Copying current files to cloned repository..."
|
||||
# Copy all files except .git, temp dir, and config file
|
||||
rsync -av \
|
||||
--exclude='.git' \
|
||||
--exclude='test' \
|
||||
--exclude="$(basename "$TEMP_DIR")" \
|
||||
--exclude="$CONFIG_FILE" \
|
||||
"$CURRENT_DIR/" "$TEMP_DIR/"
|
||||
|
||||
cd "$TEMP_DIR"
|
||||
|
||||
echo "Creating new branch '$NEW_VERSION'..."
|
||||
git checkout -b "$NEW_VERSION" 2>/dev/null || git checkout "$NEW_VERSION"
|
||||
|
||||
echo "Adding changes..."
|
||||
git add -A
|
||||
|
||||
echo "Committing changes..."
|
||||
COMMIT_MSG="Release $NEW_VERSION"
|
||||
if [ -n "$SOURCE_BRANCH" ]; then
|
||||
COMMIT_MSG="$COMMIT_MSG (from $SOURCE_BRANCH)"
|
||||
fi
|
||||
git commit -m "$COMMIT_MSG" || echo "Nothing to commit"
|
||||
|
||||
echo "Pushing to remote..."
|
||||
git push -u origin "$NEW_VERSION"
|
||||
|
||||
# Cleanup
|
||||
cd "$CURRENT_DIR"
|
||||
rm -rf "$TEMP_DIR"
|
||||
|
||||
echo ""
|
||||
echo "Done! Patch applied and pushed to branch '$NEW_VERSION'"
|
||||
echo "Based on previous version: '$LAST_VERSION'"
|
||||
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# Git repository configuration
|
||||
# Copy this file and fill with your credentials
|
||||
# NEVER commit this file with real credentials!
|
||||
|
||||
GIT_URL=http://localhost:35703/aignosi/library-distribution-mirror.git
|
||||
GIT_USER=aignosi
|
||||
GIT_TOKEN=r8sA8CPHD9!bt6d
|
||||
|
||||
|
|
@ -1,53 +1,62 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Initializes a git repository and pushes to a remote using credentials from arguments
|
||||
# Initializes a git repository and pushes to a remote using credentials from a config file
|
||||
#
|
||||
# Usage: ./git-init-from-config.sh <git-url> <git-user> <git-token>
|
||||
# Usage: ./git-init-from-config.sh <config-file>
|
||||
#
|
||||
# Arguments:
|
||||
# git-url - Repository URL (e.g., https://gitea.example.com/user/repo.git)
|
||||
# git-user - Git username
|
||||
# git-token - Git token or password
|
||||
# Config file format (one value per line):
|
||||
# GIT_URL=https://github.com/user/repo.git
|
||||
# GIT_USER=username
|
||||
# GIT_TOKEN=your_token_or_password
|
||||
#
|
||||
# The branch name will be read from the VERSION file (first line)
|
||||
|
||||
set -e
|
||||
|
||||
if [ "$#" -lt 3 ]; then
|
||||
echo "Usage: $0 <git-url> <git-user> <git-token>"
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <config-file>"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " git-url - Repository URL (e.g., https://gitea.example.com/user/repo.git)"
|
||||
echo " git-user - Git username"
|
||||
echo " git-token - Git token or password"
|
||||
echo "Config file format:"
|
||||
echo " GIT_URL=https://github.com/user/repo.git"
|
||||
echo " GIT_USER=username"
|
||||
echo " GIT_TOKEN=your_token_or_password"
|
||||
echo " GIT_BRANCH=main (optional)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GIT_URL="$1"
|
||||
GIT_USER="$2"
|
||||
GIT_TOKEN="$3"
|
||||
CONFIG_FILE="$1"
|
||||
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
echo "Error: Config file '$CONFIG_FILE' not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Load config file
|
||||
source "$CONFIG_FILE"
|
||||
|
||||
# Validate required fields
|
||||
if [ -z "$GIT_URL" ]; then
|
||||
echo "Error: git-url is required"
|
||||
echo "Error: GIT_URL not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$GIT_USER" ]; then
|
||||
echo "Error: git-user is required"
|
||||
echo "Error: GIT_USER not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$GIT_TOKEN" ]; then
|
||||
echo "Error: git-token is required"
|
||||
echo "Error: GIT_TOKEN not defined in config file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Read version from VERSION file (first line)
|
||||
if [ ! -f "VERSION" ]; then
|
||||
echo "Error: VERSION file not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GIT_BRANCH=$(head -n 1 VERSION | tr -d '[:space:]')
|
||||
GIT_BRANCH=$(head -n 1 VERSION)
|
||||
|
||||
if [ -z "$GIT_BRANCH" ]; then
|
||||
echo "Error: VERSION file is empty"
|
||||
|
|
@ -56,6 +65,8 @@ fi
|
|||
|
||||
echo "Version detected: $GIT_BRANCH"
|
||||
|
||||
# Build authenticated URL
|
||||
# Extract protocol and rest of URL
|
||||
if [[ "$GIT_URL" == https://* ]]; then
|
||||
URL_WITHOUT_PROTOCOL="${GIT_URL#https://}"
|
||||
AUTH_URL="https://${GIT_USER}:${GIT_TOKEN}@${URL_WITHOUT_PROTOCOL}"
|
||||
|
|
@ -88,3 +99,4 @@ git push -u origin "$GIT_BRANCH"
|
|||
|
||||
echo ""
|
||||
echo "Done! Repository pushed to $GIT_URL on branch $GIT_BRANCH"
|
||||
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,590 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: airium
|
||||
Version: 0.2.7
|
||||
Summary: Easy and quick html builder with natural syntax correspondence (python->html). No templates needed. Serves pure pythonic library with no dependencies.
|
||||
Home-page: https://gitlab.com/kamichal/airium
|
||||
Author: Michał Kaczmarczyk
|
||||
Author-email: michal.s.kaczmarczyk@gmail.com
|
||||
Maintainer: Michał Kaczmarczyk
|
||||
Maintainer-email: michal.s.kaczmarczyk@gmail.com
|
||||
License: MIT
|
||||
Keywords: natural html generator compiler template-less
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: Information Technology
|
||||
Classifier: Intended Audience :: Science/Research
|
||||
Classifier: Intended Audience :: System Administrators
|
||||
Classifier: Intended Audience :: Telecommunications Industry
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Topic :: Database :: Front-Ends
|
||||
Classifier: Topic :: Documentation
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
||||
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Scientific/Engineering :: Visualization
|
||||
Classifier: Topic :: Software Development :: Code Generators
|
||||
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||
Classifier: Topic :: Utilities
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: dev
|
||||
Requires-Dist: check-manifest; extra == "dev"
|
||||
Requires-Dist: flake8~=7.1; extra == "dev"
|
||||
Requires-Dist: mypy~=1.10; extra == "dev"
|
||||
Requires-Dist: pytest-cov~=3.0; extra == "dev"
|
||||
Requires-Dist: pytest-mock~=3.6; extra == "dev"
|
||||
Requires-Dist: pytest~=6.2; extra == "dev"
|
||||
Requires-Dist: types-beautifulsoup4~=4.12; extra == "dev"
|
||||
Requires-Dist: types-requests~=2.32; extra == "dev"
|
||||
Provides-Extra: parse
|
||||
Requires-Dist: requests<3,>=2.12.0; extra == "parse"
|
||||
Requires-Dist: beautifulsoup4<5.0,>=4.10.0; extra == "parse"
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: description-content-type
|
||||
Dynamic: home-page
|
||||
Dynamic: keywords
|
||||
Dynamic: license
|
||||
Dynamic: license-file
|
||||
Dynamic: maintainer
|
||||
Dynamic: maintainer-email
|
||||
Dynamic: provides-extra
|
||||
Dynamic: summary
|
||||
|
||||
## Airium
|
||||
|
||||
Bidirectional `HTML`-`python` translator.
|
||||
|
||||
[](https://pypi.python.org/pypi/airium/)
|
||||
[](https://gitlab.com/kamichal/airium/-/commits/master)
|
||||
[](https://gitlab.com/kamichal/airium/-/commits/master)
|
||||
[](https://pypi.org/project/airium/)
|
||||
[](https://pypi.python.org/pypi/airium/)
|
||||
[](https://pypi.python.org/pypi/airium/)
|
||||
|
||||
Key features:
|
||||
|
||||
- simple, straight-forward
|
||||
- template-less (just the python, you may say goodbye to all the templates)
|
||||
- DOM structure is strictly represented by python indentation (with context-managers)
|
||||
- gives much cleaner `HTML` than regular templates
|
||||
- equipped with reverse translator: `HTML` to python
|
||||
- can output either pretty (default) or minified `HTML` code
|
||||
|
||||
# Generating `HTML` code in python using `airium`
|
||||
|
||||
#### Basic `HTML` page (hello world)
|
||||
|
||||
```python
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
a('<!DOCTYPE html>')
|
||||
with a.html(lang="pl"):
|
||||
with a.head():
|
||||
a.meta(charset="utf-8")
|
||||
a.title(_t="Airium example")
|
||||
|
||||
with a.body():
|
||||
with a.h3(id="id23409231", klass='main_header'):
|
||||
a("Hello World.")
|
||||
|
||||
html = str(a) # casting to string extracts the value
|
||||
# or directly to UTF-8 encoded bytes:
|
||||
html_bytes = bytes(a) # casting to bytes is a shortcut to str(a).encode('utf-8')
|
||||
|
||||
print(html)
|
||||
```
|
||||
|
||||
Prints such a string:
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="pl">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Airium example</title>
|
||||
</head>
|
||||
<body>
|
||||
<h3 id="id23409231" class="main_header">
|
||||
Hello World.
|
||||
</h3>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
In order to store it as a file, just:
|
||||
|
||||
```python
|
||||
with open('that/file/path.html', 'wb') as f:
|
||||
f.write(bytes(html))
|
||||
```
|
||||
|
||||
#### Simple image in a div
|
||||
|
||||
```python
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
with a.div():
|
||||
a.img(src='source.png', alt='alt text')
|
||||
a('the text')
|
||||
|
||||
html_str = str(a)
|
||||
print(html_str)
|
||||
```
|
||||
|
||||
```html
|
||||
|
||||
<div>
|
||||
<img src="source.png" alt="alt text"/>
|
||||
the text
|
||||
</div>
|
||||
```
|
||||
|
||||
#### Table
|
||||
|
||||
```python
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
with a.table(id='table_372'):
|
||||
with a.tr(klass='header_row'):
|
||||
a.th(_t='no.')
|
||||
a.th(_t='Firstname')
|
||||
a.th(_t='Lastname')
|
||||
|
||||
with a.tr():
|
||||
a.td(_t='1.')
|
||||
a.td(id='jbl', _t='Jill')
|
||||
a.td(_t='Smith') # can use _t or text
|
||||
|
||||
with a.tr():
|
||||
a.td(_t='2.')
|
||||
a.td(_t='Roland', id='rmd')
|
||||
a.td(_t='Mendel')
|
||||
|
||||
table_str = str(a)
|
||||
print(table_str)
|
||||
|
||||
# To store it to a file:
|
||||
with open('/tmp/airium_www.example.com.py') as f:
|
||||
f.write(table_str)
|
||||
```
|
||||
|
||||
Now `table_str` contains such a string:
|
||||
|
||||
```html
|
||||
|
||||
<table id="table_372">
|
||||
<tr class="header_row">
|
||||
<th>no.</th>
|
||||
<th>Firstname</th>
|
||||
<th>Lastname</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>1.</td>
|
||||
<td id="jbl">Jill</td>
|
||||
<td>Smith</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2.</td>
|
||||
<td id="rmd">Roland</td>
|
||||
<td>Mendel</td>
|
||||
</tr>
|
||||
</table>
|
||||
```
|
||||
|
||||
### Chaining shortcut for elements with only one child
|
||||
|
||||
_New in version 0.2.2_
|
||||
|
||||
Having a structure with large number of `with` statements:
|
||||
|
||||
```python
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
with a.article():
|
||||
with a.table():
|
||||
with a.thead():
|
||||
with a.tr():
|
||||
a.th(_t='Column 1')
|
||||
a.th(_t='Column 2')
|
||||
with a.tbody():
|
||||
with a.tr():
|
||||
with a.td():
|
||||
a.strong(_t='Value 1')
|
||||
a.td(_t='Value 2')
|
||||
|
||||
table_str = str(a)
|
||||
print(table_str)
|
||||
```
|
||||
|
||||
You may use a shortcut that is equivalent to:
|
||||
|
||||
```python
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
with a.article().table():
|
||||
with a.thead().tr():
|
||||
a.th(_t="Column 1")
|
||||
a.th(_t="Column 2")
|
||||
with a.tbody().tr():
|
||||
a.td().strong(_t="Value 1")
|
||||
a.td(_t="Value 2")
|
||||
|
||||
table_str = str(a)
|
||||
print(table_str)
|
||||
```
|
||||
|
||||
```html
|
||||
|
||||
<article>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Column 1</th>
|
||||
<th>Column 2</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<strong>Value 1</strong>
|
||||
</td>
|
||||
<td>Value 2</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</article>
|
||||
```
|
||||
|
||||
# Options
|
||||
|
||||
### Pretty or Minify
|
||||
|
||||
By default, airium biulds `HTML` code indented with spaces and with line breaks being line feed `\n` characters.
|
||||
It can be changed while creating an `Airium` instance. In general all avaliable arguments whit their default values are:
|
||||
|
||||
```python
|
||||
a = Airium(
|
||||
base_indent=' ', # str
|
||||
current_level=0, # int
|
||||
source_minify=False, # bool
|
||||
source_line_break_character="\n", # str
|
||||
)
|
||||
```
|
||||
|
||||
#### minify
|
||||
|
||||
That's a mode when size of the code is minimized, i.e. contains as less whitespaces as it's possible.
|
||||
The option can be enabled with `source_minify` argument, i.e.:
|
||||
|
||||
```python
|
||||
a = Airium(source_minify=True)
|
||||
```
|
||||
|
||||
In case if you need to explicitly add a line break in the source code (not the `<br/>`):
|
||||
|
||||
```python
|
||||
a = Airium(source_minify=True)
|
||||
a.h1(_t="Here's your table")
|
||||
with a.table():
|
||||
with a.tr():
|
||||
a.break_source_line()
|
||||
a.th(_t="Cell 11")
|
||||
a.th(_t="Cell 12")
|
||||
with a.tr():
|
||||
a.break_source_line()
|
||||
a.th(_t="Cell 21")
|
||||
a.th(_t="Cell 22")
|
||||
a.break_source_line()
|
||||
a.p(_t="Another content goes here")
|
||||
```
|
||||
|
||||
Will result with such a code:
|
||||
|
||||
```html
|
||||
<h1>Here's your table</h1><table><tr>
|
||||
<th>Cell 11</th><th>Cell 12</th></tr><tr>
|
||||
<th>Cell 21</th><th>Cell 22</th></tr>
|
||||
</table><p>Another content goes here</p>
|
||||
```
|
||||
|
||||
Note that the `break_source_line` cannot be used
|
||||
in [context manager chains](#chaining-shortcut-for-elements-with-only-one-child).
|
||||
|
||||
#### indent style
|
||||
|
||||
The default indent of the generated HTML code has two spaces per each indent level.
|
||||
You can change it to `\t` or 4 spaces by setting `Airium` constructor argument, e.g.:
|
||||
|
||||
```python
|
||||
a = Airium(base_indent="\t") # one tab symbol
|
||||
a = Airium(base_indent=" ") # 4 spaces per each indentation level
|
||||
a = Airium(base_indent=" ") # 1 space per one level
|
||||
# pick one of the above statements, it can be mixed with other arguments
|
||||
```
|
||||
|
||||
Note that this setting is ignored when `source_minify` argument is set to `True` (see above).
|
||||
|
||||
There is a special case when you set the base indent to empty string. It would disable indentation,
|
||||
but line breaks will be still added. In order to get rid of line breaks, check the `source_minify` argument.
|
||||
|
||||
#### indent level
|
||||
|
||||
The `current_level` being an integer can be set to non-negative
|
||||
value, wich will cause `airium` to start indentation with level offset given by the number.
|
||||
|
||||
#### line break character
|
||||
|
||||
By default, just a line feed (`\n`) is used for terminating lines of the generated code.
|
||||
You can change it to different style, e.g. `\r\n` or `\r` by setting `source_line_break_character` to the desired value.
|
||||
|
||||
```python
|
||||
a = Airium(source_line_break_character="\r\n") # windows' style
|
||||
```
|
||||
|
||||
Note that the setting has no effect when `source_minify` argument is set to `True` (see above).
|
||||
|
||||
# Using airium with web-frameworks
|
||||
|
||||
Airium can be used with frameworks like Flask or Django. It can completely replace
|
||||
template engines, reducing code-files scater, which may bring better code organization, and some other reasons.
|
||||
|
||||
Here is an example of using airium with django. It implements reusable `basic_body` and a view called `index`.
|
||||
|
||||
```python
|
||||
# file: your_app/views.py
|
||||
import contextlib
|
||||
import inspect
|
||||
|
||||
from airium import Airium
|
||||
from django.http import HttpResponse
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def basic_body(a: Airium, useful_name: str = ''):
|
||||
"""Works like a Django/Ninja template."""
|
||||
|
||||
a('<!DOCTYPE html>')
|
||||
with a.html(lang='en'):
|
||||
with a.head():
|
||||
a.meta(charset='utf-8')
|
||||
a.meta(content='width=device-width, initial-scale=1', name='viewport')
|
||||
# do not use CSS from this URL in a production, it's just for an educational purpose
|
||||
a.link(href='https://unpkg.com/@picocss/pico@1.4.1/css/pico.css', rel='stylesheet')
|
||||
a.title(_t=f'Hello World')
|
||||
|
||||
with a.body():
|
||||
with a.div():
|
||||
with a.nav(klass='container-fluid'):
|
||||
with a.ul():
|
||||
with a.li():
|
||||
with a.a(klass='contrast', href='./'):
|
||||
a.strong(_t="⌨ Foo Bar")
|
||||
with a.ul():
|
||||
with a.li():
|
||||
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'auto'}, _t='Auto')
|
||||
with a.li():
|
||||
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'light'}, _t='Light')
|
||||
with a.li():
|
||||
a.a(klass='contrast', href='#', **{'data-theme-switcher': 'dark'}, _t='Dark')
|
||||
|
||||
with a.header(klass='container'):
|
||||
with a.hgroup():
|
||||
a.h1(_t=f"You're on the {useful_name}")
|
||||
a.h2(_t="It's a page made by our automatons with a power of steam engines.")
|
||||
|
||||
with a.main(klass='container'):
|
||||
yield # This is the point where main content gets inserted
|
||||
|
||||
with a.footer(klass='container'):
|
||||
with a.small():
|
||||
margin = 'margin: auto 10px;'
|
||||
a.span(_t='© Airium HTML generator example', style=margin)
|
||||
|
||||
# do not use JS from this URL in a production, it's just for an educational purpose
|
||||
a.script(src='https://picocss.com/examples/js/minimal-theme-switcher.js')
|
||||
|
||||
|
||||
def index(request) -> HttpResponse:
|
||||
a = Airium()
|
||||
with basic_body(a, f'main page: {request.path}'):
|
||||
with a.article():
|
||||
a.h3(_t="Hello World from Django running Airium")
|
||||
with a.p().small():
|
||||
a("This bases on ")
|
||||
with a.a(href="https://picocss.com/examples/company/"):
|
||||
a("Pico.css / Company example")
|
||||
|
||||
with a.p():
|
||||
a("Instead of a HTML template, airium has been used.")
|
||||
a("The whole body is generated by a template "
|
||||
"and the article code looks like that:")
|
||||
|
||||
with a.code().pre():
|
||||
a(inspect.getsource(index))
|
||||
|
||||
return HttpResponse(bytes(a)) # from django.http import HttpResponse
|
||||
```
|
||||
|
||||
Route it in `urls.py` just like a regular view:
|
||||
|
||||
```python
|
||||
# file: your_app/urls.py
|
||||
from django.contrib import admin
|
||||
from django.urls import path
|
||||
|
||||
import your_app
|
||||
|
||||
urlpatterns = [
|
||||
path('index/', your_app.views.index),
|
||||
path('admin/', admin.site.urls),
|
||||
]
|
||||
```
|
||||
|
||||
The result ing web page on my machine looks like that:
|
||||
|
||||

|
||||
|
||||
# Reverse translation
|
||||
|
||||
Airium is equipped with a transpiler `[HTML -> py]`.
|
||||
It generates python code out of a given `HTML` string.
|
||||
|
||||
### Using reverse translator as a binary:
|
||||
|
||||
Ensure you have [installed](#installation) `[parse]` extras. Then call in command line:
|
||||
|
||||
```bash
|
||||
airium http://www.example.com
|
||||
```
|
||||
|
||||
That will fetch the document and translate it to python code.
|
||||
The code calls `airium` statements that reproduce the `HTML` document given.
|
||||
It may give a clue - how to define `HTML` structure for a given
|
||||
web page using `airium` package.
|
||||
|
||||
To store the translation's result into a file:
|
||||
|
||||
```bash
|
||||
airium http://www.example.com > /tmp/airium_example_com.py
|
||||
```
|
||||
|
||||
You can also parse local `HTML` files:
|
||||
|
||||
```bash
|
||||
airium /path/to/your_file.html > /tmp/airium_my_file.py
|
||||
```
|
||||
|
||||
You may also try to parse your Django templates. I'm not sure if it works,
|
||||
but there will be probably not much to fix.
|
||||
|
||||
### Using reverse translator as python code:
|
||||
|
||||
```python
|
||||
from airium import from_html_to_airium
|
||||
|
||||
# assume we have such a page given as a string:
|
||||
html_str = """\
|
||||
<!DOCTYPE html>
|
||||
<html lang="pl">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<title>Airium example</title>
|
||||
</head>
|
||||
<body>
|
||||
<h3 id="id23409231" class="main_header">
|
||||
Hello World.
|
||||
</h3>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# to convert the html into python, just call:
|
||||
|
||||
py_str = from_html_to_airium(html_str)
|
||||
|
||||
# airium tests ensure that the result of the conversion is equal to the string:
|
||||
assert py_str == """\
|
||||
#!/usr/bin/env python
|
||||
# File generated by reverse AIRIUM translator (version 0.2.7).
|
||||
# Any change will be overridden on next run.
|
||||
# flake8: noqa E501 (line too long)
|
||||
|
||||
from airium import Airium
|
||||
|
||||
a = Airium()
|
||||
|
||||
a('<!DOCTYPE html>')
|
||||
with a.html(lang='pl'):
|
||||
with a.head():
|
||||
a.meta(charset='utf-8')
|
||||
a.title(_t='Airium example')
|
||||
with a.body():
|
||||
a.h3(klass='main_header', id='id23409231', _t='Hello World.')
|
||||
"""
|
||||
```
|
||||
|
||||
### <a name="transpiler_limitations">Transpiler limitations</a>
|
||||
|
||||
> so far in version 0.2.2:
|
||||
|
||||
- result of translation does not keep exact amount of leading whitespaces
|
||||
within `<pre>` tags. They come over-indented in python code.
|
||||
|
||||
This is not however an issue when code is generated from python to `HTML`.
|
||||
|
||||
- although it keeps the proper tags structure, the transpiler does not
|
||||
chain all the `with` statements, so in some cases the generated
|
||||
code may be much indented.
|
||||
|
||||
- it's not too fast
|
||||
|
||||
# <a name="installation">Installation</a>
|
||||
|
||||
If you need a new virtual environment, call:
|
||||
|
||||
```bash
|
||||
virtualenv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
Having it activated - you may install airium like this:
|
||||
|
||||
```bash
|
||||
pip install airium
|
||||
```
|
||||
|
||||
In order to use reverse translation - two additional packages are needed, run:
|
||||
|
||||
```bash
|
||||
pip install airium[parse]
|
||||
```
|
||||
|
||||
Then check if the transpiler works by calling:
|
||||
|
||||
```bash
|
||||
airium --help
|
||||
```
|
||||
|
||||
> Enjoy!
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for airium
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for airium
|
||||
</h1>
|
||||
<a href="/airium/airium-0.2.7-py3-none-any.whl#sha256=35e3ae334327b17b7c2fc39bb57ab2c48171ca849f8cf3dff11437d1e054952e" data-dist-info-metadata="sha256=48022884c676a59c85113445ae9e14ad7f149808fb5d62c2660f8c4567489fe5">
|
||||
airium-0.2.7-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,187 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: apeye-core
|
||||
Version: 1.1.5
|
||||
Summary: Core (offline) functionality for the apeye library.
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/apeye-core
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/apeye-core/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/apeye-core
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2022, Dominic Davis-Foster
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
License-File: LICENSE
|
||||
Keywords: url
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: BSD License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6.1
|
||||
Requires-Dist: domdf-python-tools>=2.6.0
|
||||
Requires-Dist: idna>=2.5
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
===========
|
||||
apeye-core
|
||||
===========
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Core (offline) functionality for the apeye library.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/apeye-core/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/apeye-core/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/apeye-core/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/apeye-core/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/apeye-core/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye-core/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye-core/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye-core/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/apeye-core/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/apeye-core?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/apeye-core?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/apeye-core
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/apeye-core
|
||||
:target: https://pypi.org/project/apeye-core/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/apeye-core?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/apeye-core/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/apeye-core
|
||||
:target: https://pypi.org/project/apeye-core/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/apeye-core
|
||||
:target: https://pypi.org/project/apeye-core/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/apeye-core?logo=anaconda
|
||||
:target: https://anaconda.org/conda-forge/apeye-core
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/apeye-core?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/conda-forge/apeye-core
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/apeye-core
|
||||
:target: https://github.com/domdfcoding/apeye-core/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/apeye-core
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/apeye-core/v1.1.5
|
||||
:target: https://github.com/domdfcoding/apeye-core/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/apeye-core
|
||||
:target: https://github.com/domdfcoding/apeye-core/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2024
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/apeye-core
|
||||
:target: https://pypi.org/project/apeye-core/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``apeye-core`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install apeye-core
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install -c conda-forge apeye-core
|
||||
|
||||
.. end installation
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for apeye-core
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for apeye-core
|
||||
</h1>
|
||||
<a href="/apeye-core/apeye_core-1.1.5-py3-none-any.whl#sha256=dc27a93f8c9e246b3b238c5ea51edf6115ab2618ef029b9f2d9a190ec8228fbf" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=751bbcd20a27f156c12183849bc78419fbac8ca5a51c29fb5137e01e6aeb5e78">
|
||||
apeye_core-1.1.5-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,210 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: apeye
|
||||
Version: 1.4.1
|
||||
Summary: Handy tools for working with URLs and APIs.
|
||||
Keywords: api,cache,requests,rest,url
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
Requires-Python: >=3.6.1
|
||||
Description-Content-Type: text/x-rst
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Dist: apeye-core>=1.0.0b2
|
||||
Requires-Dist: domdf-python-tools>=2.6.0
|
||||
Requires-Dist: platformdirs>=2.3.0
|
||||
Requires-Dist: requests>=2.24.0
|
||||
Requires-Dist: cachecontrol[filecache]>=0.12.6 ; extra == "all"
|
||||
Requires-Dist: lockfile>=0.12.2 ; extra == "all"
|
||||
Requires-Dist: cachecontrol[filecache]>=0.12.6 ; extra == "limiter"
|
||||
Requires-Dist: lockfile>=0.12.2 ; extra == "limiter"
|
||||
Project-URL: Documentation, https://apeye.readthedocs.io/en/latest
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/apeye
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/apeye/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/apeye
|
||||
Provides-Extra: all
|
||||
Provides-Extra: limiter
|
||||
|
||||
======
|
||||
apeye
|
||||
======
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Handy tools for working with URLs and APIs.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/apeye/latest?logo=read-the-docs
|
||||
:target: https://apeye.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/apeye/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/apeye/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/apeye/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/apeye/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/apeye/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/apeye/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/apeye/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/apeye/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/apeye/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/apeye?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/apeye?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/apeye
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/apeye
|
||||
:target: https://pypi.org/project/apeye/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/apeye?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/apeye/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/apeye
|
||||
:target: https://pypi.org/project/apeye/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/apeye
|
||||
:target: https://pypi.org/project/apeye/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/apeye?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/apeye
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/apeye?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/apeye
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/apeye
|
||||
:target: https://github.com/domdfcoding/apeye/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/apeye
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/apeye/v1.4.1
|
||||
:target: https://github.com/domdfcoding/apeye/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/apeye
|
||||
:target: https://github.com/domdfcoding/apeye/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/apeye
|
||||
:target: https://pypi.org/project/apeye/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
``apeye`` provides:
|
||||
|
||||
* ``pathlib.Path``\-like objects to represent URLs
|
||||
* a JSON-backed cache decorator for functions
|
||||
* a CacheControl_ adapter to limit the rate of requests
|
||||
|
||||
See `the documentation`_ for more details.
|
||||
|
||||
.. _CacheControl: https://github.com/ionrock/cachecontrol
|
||||
.. _the documentation: https://apeye.readthedocs.io/en/latest/api/cache.html
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``apeye`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install apeye
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install apeye
|
||||
|
||||
.. end installation
|
||||
|
||||
|
||||
.. attention::
|
||||
|
||||
In v0.9.0 and above the ``rate_limiter`` module requires the ``limiter`` extra to be installed:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install apeye[limiter]
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for apeye
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for apeye
|
||||
</h1>
|
||||
<a href="/apeye/apeye-1.4.1-py3-none-any.whl#sha256=44e58a9104ec189bf42e76b3a7fe91e2b2879d96d48e9a77e5e32ff699c9204e" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=c76bd745f0ea8d7105ed23a0827bf960cd651e8e071dbdeb62946a390ddf86c1">
|
||||
apeye-1.4.1-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,6 +1,6 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: boto3
|
||||
Version: 1.41.5
|
||||
Version: 1.41.4
|
||||
Summary: The AWS SDK for Python
|
||||
Home-page: https://github.com/boto/boto3
|
||||
Author: Amazon Web Services
|
||||
|
|
@ -22,7 +22,7 @@ Classifier: Programming Language :: Python :: 3.14
|
|||
Requires-Python: >= 3.9
|
||||
License-File: LICENSE
|
||||
License-File: NOTICE
|
||||
Requires-Dist: botocore (<1.42.0,>=1.41.5)
|
||||
Requires-Dist: botocore (<1.42.0,>=1.41.4)
|
||||
Requires-Dist: jmespath (<2.0.0,>=0.7.1)
|
||||
Requires-Dist: s3transfer (<0.16.0,>=0.15.0)
|
||||
Provides-Extra: crt
|
||||
|
|
@ -12,8 +12,8 @@
|
|||
<h1>
|
||||
Links for boto3
|
||||
</h1>
|
||||
<a href="/boto3/boto3-1.41.5-py3-none-any.whl#sha256=bb278111bfb4c33dca8342bda49c9db7685e43debbfa00cc2a5eb854dd54b745" data-requires-python=">= 3.9" data-dist-info-metadata="sha256=329053bf9a9139cc670ba9b8557fe3e7400b57d3137514c9baf0c3209ac04d1f">
|
||||
boto3-1.41.5-py3-none-any.whl
|
||||
<a href="/boto3/boto3-1.41.4-py3-none-any.whl#sha256=77d84b7ce890a9b0c6a8993f8de106d8cf8138f332a4685e6de453965e60cb24" data-requires-python=">= 3.9" data-dist-info-metadata="sha256=c263458fb50f5617ae8ff675602ce4b3eedbbaa5c7b7ccf58a72adc50ea35e56">
|
||||
boto3-1.41.4-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,6 +1,6 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: botocore
|
||||
Version: 1.41.5
|
||||
Version: 1.41.4
|
||||
Summary: Low-level, data-driven core of boto 3.
|
||||
Home-page: https://github.com/boto/botocore
|
||||
Author: Amazon Web Services
|
||||
|
|
@ -12,8 +12,8 @@
|
|||
<h1>
|
||||
Links for botocore
|
||||
</h1>
|
||||
<a href="/botocore/botocore-1.41.5-py3-none-any.whl#sha256=3fef7fcda30c82c27202d232cfdbd6782cb27f20f8e7e21b20606483e66ee73a" data-requires-python=">= 3.9" data-dist-info-metadata="sha256=867c86c9f400df83088bb210e49402344febc90aa6b10d46a0cd02642ae1096c">
|
||||
botocore-1.41.5-py3-none-any.whl
|
||||
<a href="/botocore/botocore-1.41.4-py3-none-any.whl#sha256=7143ef845f1d1400dbbf05d999f8c5e8cfaecd6bd84cbfbe5fa0a40e3a9f6353" data-requires-python=">= 3.9" data-dist-info-metadata="sha256=c54e339761f3067ceebafb82873edf4713098abb5ad00b802f347104b1200a04">
|
||||
botocore-1.41.4-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/botocore/botocore-1.40.70-py3-none-any.whl#sha256=4a394ad25f5d9f1ef0bed610365744523eeb5c22de6862ab25d8c93f9f6d295c" data-requires-python=">= 3.9" data-dist-info-metadata="sha256=ff124fb918cb0210e04c2c4396cb3ad31bbe26884306bf4d35b9535ece1feb27">
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,78 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: certifi
|
||||
Version: 2025.11.12
|
||||
Summary: Python package for providing Mozilla's CA Bundle.
|
||||
Home-page: https://github.com/certifi/python-certifi
|
||||
Author: Kenneth Reitz
|
||||
Author-email: me@kennethreitz.com
|
||||
License: MPL-2.0
|
||||
Project-URL: Source, https://github.com/certifi/python-certifi
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Requires-Python: >=3.7
|
||||
License-File: LICENSE
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: home-page
|
||||
Dynamic: license
|
||||
Dynamic: license-file
|
||||
Dynamic: project-url
|
||||
Dynamic: requires-python
|
||||
Dynamic: summary
|
||||
|
||||
Certifi: Python SSL Certificates
|
||||
================================
|
||||
|
||||
Certifi provides Mozilla's carefully curated collection of Root Certificates for
|
||||
validating the trustworthiness of SSL certificates while verifying the identity
|
||||
of TLS hosts. It has been extracted from the `Requests`_ project.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
``certifi`` is available on PyPI. Simply install it with ``pip``::
|
||||
|
||||
$ pip install certifi
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
To reference the installed certificate authority (CA) bundle, you can use the
|
||||
built-in function::
|
||||
|
||||
>>> import certifi
|
||||
|
||||
>>> certifi.where()
|
||||
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
|
||||
|
||||
Or from the command line::
|
||||
|
||||
$ python -m certifi
|
||||
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
|
||||
|
||||
Enjoy!
|
||||
|
||||
.. _`Requests`: https://requests.readthedocs.io/en/master/
|
||||
|
||||
Addition/Removal of Certificates
|
||||
--------------------------------
|
||||
|
||||
Certifi does not support any addition/removal or other modification of the
|
||||
CA trust store content. This project is intended to provide a reliable and
|
||||
highly portable root of trust to python deployments. Look to upstream projects
|
||||
for methods to use alternate trust.
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for certifi
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for certifi
|
||||
</h1>
|
||||
<a href="/certifi/certifi-2025.11.12-py3-none-any.whl#sha256=97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b" data-requires-python=">=3.7" data-dist-info-metadata="sha256=fc9a6b1aeff595649d1e5aee44129ca2b5e7adfbc10e1bd7ffa291afc1d06cb7">
|
||||
certifi-2025.11.12-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,763 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode_backport
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,763 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode_backport
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Binary file not shown.
|
|
@ -1,764 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: charset-normalizer
|
||||
Version: 3.4.4
|
||||
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
|
||||
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
|
||||
License: MIT
|
||||
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
|
||||
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
|
||||
Project-URL: Code, https://github.com/jawah/charset_normalizer
|
||||
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
|
||||
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Text Processing :: Linguistic
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: unicode-backport
|
||||
Dynamic: license-file
|
||||
|
||||
<h1 align="center">Charset Detection, for Everyone 👋</h1>
|
||||
|
||||
<p align="center">
|
||||
<sup>The Real First Universal Charset Detector</sup><br>
|
||||
<a href="https://pypi.org/project/charset-normalizer">
|
||||
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
|
||||
</a>
|
||||
<a href="https://pepy.tech/project/charset-normalizer/">
|
||||
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
|
||||
</a>
|
||||
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
|
||||
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>Featured Packages</i></sup><br>
|
||||
<a href="https://github.com/jawah/niquests">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
|
||||
</a>
|
||||
<a href="https://github.com/jawah/wassima">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
|
||||
</a>
|
||||
</p>
|
||||
<p align="center">
|
||||
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
|
||||
<a href="https://github.com/nickspring/charset-normalizer-rs">
|
||||
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
|
||||
> I'm trying to resolve the issue by taking a new approach.
|
||||
> All IANA character set names for which the Python core library provides codecs are supported.
|
||||
|
||||
<p align="center">
|
||||
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
|
||||
</p>
|
||||
|
||||
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
|
||||
|
||||
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|
||||
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
|
||||
| `Fast` | ❌ | ✅ | ✅ |
|
||||
| `Universal**` | ❌ | ✅ | ❌ |
|
||||
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
|
||||
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
|
||||
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
|
||||
| `Native Python` | ✅ | ✅ | ❌ |
|
||||
| `Detect spoken language` | ❌ | ✅ | N/A |
|
||||
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
|
||||
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
|
||||
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
|
||||
|
||||
<p align="center">
|
||||
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
|
||||
</p>
|
||||
|
||||
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
This package offer better performance than its counterpart Chardet. Here are some numbers.
|
||||
|
||||
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|
||||
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
|
||||
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
|
||||
|
||||
| Package | 99th percentile | 95th percentile | 50th percentile |
|
||||
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
|
||||
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
|
||||
| charset-normalizer | 100 ms | 50 ms | 5 ms |
|
||||
|
||||
_updated as of december 2024 using CPython 3.12_
|
||||
|
||||
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
|
||||
|
||||
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
|
||||
> And yes, these results might change at any time. The dataset can be updated to include more files.
|
||||
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
|
||||
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
|
||||
> (e.g. Supported Encoding) Challenge-them if you want.
|
||||
|
||||
## ✨ Installation
|
||||
|
||||
Using pip:
|
||||
|
||||
```sh
|
||||
pip install charset-normalizer -U
|
||||
```
|
||||
|
||||
## 🚀 Basic Usage
|
||||
|
||||
### CLI
|
||||
This package comes with a CLI.
|
||||
|
||||
```
|
||||
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
|
||||
file [file ...]
|
||||
|
||||
The Real First Universal Charset Detector. Discover originating encoding used
|
||||
on text file. Normalize text to unicode.
|
||||
|
||||
positional arguments:
|
||||
files File(s) to be analysed
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-v, --verbose Display complementary information about file if any.
|
||||
Stdout will contain logs about the detection process.
|
||||
-a, --with-alternative
|
||||
Output complementary possibilities if any. Top-level
|
||||
JSON WILL be a list.
|
||||
-n, --normalize Permit to normalize input file. If not set, program
|
||||
does not write anything.
|
||||
-m, --minimal Only output the charset detected to STDOUT. Disabling
|
||||
JSON output.
|
||||
-r, --replace Replace file when trying to normalize it instead of
|
||||
creating a new one.
|
||||
-f, --force Replace file without asking if you are sure, use this
|
||||
flag with caution.
|
||||
-t THRESHOLD, --threshold THRESHOLD
|
||||
Define a custom maximum amount of chaos allowed in
|
||||
decoded content. 0. <= chaos <= 1.
|
||||
--version Show version information and exit.
|
||||
```
|
||||
|
||||
```bash
|
||||
normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```bash
|
||||
python -m charset_normalizer ./data/sample.1.fr.srt
|
||||
```
|
||||
|
||||
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
|
||||
|
||||
```json
|
||||
{
|
||||
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
|
||||
"encoding": "cp1252",
|
||||
"encoding_aliases": [
|
||||
"1252",
|
||||
"windows_1252"
|
||||
],
|
||||
"alternative_encodings": [
|
||||
"cp1254",
|
||||
"cp1256",
|
||||
"cp1258",
|
||||
"iso8859_14",
|
||||
"iso8859_15",
|
||||
"iso8859_16",
|
||||
"iso8859_3",
|
||||
"iso8859_9",
|
||||
"latin_1",
|
||||
"mbcs"
|
||||
],
|
||||
"language": "French",
|
||||
"alphabets": [
|
||||
"Basic Latin",
|
||||
"Latin-1 Supplement"
|
||||
],
|
||||
"has_sig_or_bom": false,
|
||||
"chaos": 0.149,
|
||||
"coherence": 97.152,
|
||||
"unicode_path": null,
|
||||
"is_preferred": true
|
||||
}
|
||||
```
|
||||
|
||||
### Python
|
||||
*Just print out normalized text*
|
||||
```python
|
||||
from charset_normalizer import from_path
|
||||
|
||||
results = from_path('./my_subtitle.srt')
|
||||
|
||||
print(str(results.best()))
|
||||
```
|
||||
|
||||
*Upgrade your code without effort*
|
||||
```python
|
||||
from charset_normalizer import detect
|
||||
```
|
||||
|
||||
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
|
||||
|
||||
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
|
||||
|
||||
## 😇 Why
|
||||
|
||||
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
|
||||
reliable alternative using a completely different method. Also! I never back down on a good challenge!
|
||||
|
||||
I **don't care** about the **originating charset** encoding, because **two different tables** can
|
||||
produce **two identical rendered string.**
|
||||
What I want is to get readable text, the best I can.
|
||||
|
||||
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
|
||||
|
||||
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
|
||||
|
||||
## 🍰 How
|
||||
|
||||
- Discard all charset encoding table that could not fit the binary content.
|
||||
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
|
||||
- Extract matches with the lowest mess detected.
|
||||
- Additionally, we measure coherence / probe for a language.
|
||||
|
||||
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
|
||||
|
||||
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
|
||||
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
|
||||
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
|
||||
improve or rewrite it.
|
||||
|
||||
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
|
||||
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
|
||||
|
||||
## ⚡ Known limitations
|
||||
|
||||
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
|
||||
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
|
||||
|
||||
## ⚠️ About Python EOLs
|
||||
|
||||
**If you are running:**
|
||||
|
||||
- Python >=2.7,<3.5: Unsupported
|
||||
- Python 3.5: charset-normalizer < 2.1
|
||||
- Python 3.6: charset-normalizer < 3.1
|
||||
- Python 3.7: charset-normalizer < 4.0
|
||||
|
||||
Upgrade your Python interpreter as soon as possible.
|
||||
|
||||
## 👤 Contributing
|
||||
|
||||
Contributions, issues and feature requests are very much welcome.<br />
|
||||
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
|
||||
|
||||
## 📝 License
|
||||
|
||||
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
|
||||
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
|
||||
|
||||
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
|
||||
|
||||
## 💼 For Enterprise
|
||||
|
||||
Professional support for charset-normalizer is available as part of the [Tidelift
|
||||
Subscription][1]. Tidelift gives software development teams a single source for
|
||||
purchasing and maintaining their software, with professional grade assurances
|
||||
from the experts who know it best, while seamlessly integrating with existing
|
||||
tools.
|
||||
|
||||
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
|
||||
|
||||
[](https://www.bestpractices.dev/projects/7297)
|
||||
|
||||
# Changelog
|
||||
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
|
||||
|
||||
### Changed
|
||||
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
|
||||
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
|
||||
|
||||
### Removed
|
||||
- `setuptools-scm` as a build dependency.
|
||||
|
||||
### Misc
|
||||
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
|
||||
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
|
||||
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
|
||||
|
||||
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
|
||||
|
||||
### Changed
|
||||
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
|
||||
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
|
||||
|
||||
### Added
|
||||
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
|
||||
- Support for Python 3.14
|
||||
|
||||
### Fixed
|
||||
- sdist archive contained useless directories.
|
||||
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
|
||||
|
||||
### Misc
|
||||
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
|
||||
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
|
||||
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
|
||||
|
||||
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
|
||||
|
||||
### Fixed
|
||||
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
|
||||
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
|
||||
|
||||
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
|
||||
|
||||
### Changed
|
||||
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
|
||||
- Enforce annotation delayed loading for a simpler and consistent types in the project.
|
||||
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
|
||||
|
||||
### Added
|
||||
- pre-commit configuration.
|
||||
- noxfile.
|
||||
|
||||
### Removed
|
||||
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
|
||||
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
|
||||
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
|
||||
- Unused `utils.range_scan` function.
|
||||
|
||||
### Fixed
|
||||
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
|
||||
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
|
||||
|
||||
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
|
||||
|
||||
### Added
|
||||
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
|
||||
- Support for Python 3.13 (#512)
|
||||
|
||||
### Fixed
|
||||
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
|
||||
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
|
||||
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
|
||||
|
||||
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
|
||||
|
||||
### Fixed
|
||||
- Unintentional memory usage regression when using large payload that match several encoding (#376)
|
||||
- Regression on some detection case showcased in the documentation (#371)
|
||||
|
||||
### Added
|
||||
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
|
||||
|
||||
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
|
||||
|
||||
### Changed
|
||||
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
|
||||
- Improved the general detection reliability based on reports from the community
|
||||
|
||||
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
|
||||
|
||||
### Added
|
||||
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
|
||||
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
|
||||
|
||||
### Removed
|
||||
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
|
||||
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
|
||||
|
||||
### Changed
|
||||
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
|
||||
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
|
||||
|
||||
### Fixed
|
||||
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
|
||||
|
||||
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
|
||||
|
||||
### Changed
|
||||
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
|
||||
- Minor improvement over the global detection reliability
|
||||
|
||||
### Added
|
||||
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
|
||||
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
|
||||
- Explicit support for Python 3.12
|
||||
|
||||
### Fixed
|
||||
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
|
||||
|
||||
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
|
||||
|
||||
### Added
|
||||
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.6 (PR #260)
|
||||
|
||||
### Changed
|
||||
- Optional speedup provided by mypy/c 1.0.1
|
||||
|
||||
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
|
||||
|
||||
### Fixed
|
||||
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
|
||||
|
||||
### Changed
|
||||
- Speedup provided by mypy/c 0.990 on Python >= 3.7
|
||||
|
||||
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
|
||||
|
||||
### Added
|
||||
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
|
||||
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
|
||||
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
|
||||
|
||||
### Changed
|
||||
- Build with static metadata using 'build' frontend
|
||||
- Make the language detection stricter
|
||||
|
||||
### Fixed
|
||||
- CLI with opt --normalize fail when using full path for files
|
||||
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
|
||||
|
||||
### Removed
|
||||
- Coherence detector no longer return 'Simple English' instead return 'English'
|
||||
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
|
||||
|
||||
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
|
||||
|
||||
### Added
|
||||
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
|
||||
|
||||
### Removed
|
||||
- Breaking: Method `first()` and `best()` from CharsetMatch
|
||||
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
|
||||
|
||||
### Fixed
|
||||
- Sphinx warnings when generating the documentation
|
||||
|
||||
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
|
||||
|
||||
### Changed
|
||||
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
|
||||
|
||||
### Removed
|
||||
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
|
||||
- Breaking: Top-level function `normalize`
|
||||
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
|
||||
- Support for the backport `unicodedata2`
|
||||
|
||||
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
|
||||
|
||||
### Deprecated
|
||||
- Function `normalize` scheduled for removal in 3.0
|
||||
|
||||
### Changed
|
||||
- Removed useless call to decode in fn is_unprintable (#206)
|
||||
|
||||
### Fixed
|
||||
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
|
||||
|
||||
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
|
||||
|
||||
### Added
|
||||
- Output the Unicode table version when running the CLI with `--version` (PR #194)
|
||||
|
||||
### Changed
|
||||
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
|
||||
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
|
||||
|
||||
### Fixed
|
||||
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
|
||||
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
|
||||
|
||||
### Removed
|
||||
- Support for Python 3.5 (PR #192)
|
||||
|
||||
### Deprecated
|
||||
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
|
||||
|
||||
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
|
||||
|
||||
### Fixed
|
||||
- ASCII miss-detection on rare cases (PR #170)
|
||||
|
||||
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
|
||||
|
||||
### Added
|
||||
- Explicit support for Python 3.11 (PR #164)
|
||||
|
||||
### Changed
|
||||
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
|
||||
|
||||
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
|
||||
|
||||
### Fixed
|
||||
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
|
||||
|
||||
### Changed
|
||||
- Skipping the language-detection (CD) on ASCII (PR #155)
|
||||
|
||||
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
|
||||
|
||||
### Changed
|
||||
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
|
||||
|
||||
### Fixed
|
||||
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
|
||||
|
||||
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
|
||||
### Changed
|
||||
- Improvement over Vietnamese detection (PR #126)
|
||||
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
|
||||
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
|
||||
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
|
||||
- Code style as refactored by Sourcery-AI (PR #131)
|
||||
- Minor adjustment on the MD around european words (PR #133)
|
||||
- Remove and replace SRTs from assets / tests (PR #139)
|
||||
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
|
||||
|
||||
### Fixed
|
||||
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
|
||||
- Avoid using too insignificant chunk (PR #137)
|
||||
|
||||
### Added
|
||||
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
|
||||
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
|
||||
|
||||
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
|
||||
### Added
|
||||
- Add support for Kazakh (Cyrillic) language detection (PR #109)
|
||||
|
||||
### Changed
|
||||
- Further, improve inferring the language from a given single-byte code page (PR #112)
|
||||
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
|
||||
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
|
||||
- Various detection improvement (MD+CD) (PR #117)
|
||||
|
||||
### Removed
|
||||
- Remove redundant logging entry about detected language(s) (PR #115)
|
||||
|
||||
### Fixed
|
||||
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
|
||||
|
||||
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
|
||||
### Fixed
|
||||
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
|
||||
- Fix CLI crash when using --minimal output in certain cases (PR #103)
|
||||
|
||||
### Changed
|
||||
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
|
||||
|
||||
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
|
||||
### Changed
|
||||
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
|
||||
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
|
||||
- The Unicode detection is slightly improved (PR #93)
|
||||
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
|
||||
|
||||
### Removed
|
||||
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
|
||||
|
||||
### Fixed
|
||||
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
|
||||
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
|
||||
- The MANIFEST.in was not exhaustive (PR #78)
|
||||
|
||||
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
|
||||
### Fixed
|
||||
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
|
||||
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
|
||||
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
|
||||
- Submatch factoring could be wrong in rare edge cases (PR #72)
|
||||
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
|
||||
- Fix line endings from CRLF to LF for certain project files (PR #67)
|
||||
|
||||
### Changed
|
||||
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
|
||||
- Allow fallback on specified encoding if any (PR #71)
|
||||
|
||||
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
|
||||
### Changed
|
||||
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
|
||||
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
|
||||
|
||||
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
|
||||
### Fixed
|
||||
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
|
||||
|
||||
### Changed
|
||||
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
|
||||
|
||||
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
|
||||
### Fixed
|
||||
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
|
||||
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
|
||||
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
|
||||
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
|
||||
|
||||
### Changed
|
||||
- Public function normalize default args values were not aligned with from_bytes (PR #53)
|
||||
|
||||
### Added
|
||||
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
|
||||
|
||||
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
|
||||
### Changed
|
||||
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
|
||||
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
|
||||
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
|
||||
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
|
||||
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
|
||||
- utf_7 detection has been reinstated.
|
||||
|
||||
### Removed
|
||||
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
|
||||
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
|
||||
- The exception hook on UnicodeDecodeError has been removed.
|
||||
|
||||
### Deprecated
|
||||
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
|
||||
|
||||
### Fixed
|
||||
- The CLI output used the relative path of the file(s). Should be absolute.
|
||||
|
||||
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
|
||||
### Fixed
|
||||
- Logger configuration/usage no longer conflict with others (PR #44)
|
||||
|
||||
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
|
||||
### Removed
|
||||
- Using standard logging instead of using the package loguru.
|
||||
- Dropping nose test framework in favor of the maintained pytest.
|
||||
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
|
||||
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
|
||||
- Stop support for UTF-7 that does not contain a SIG.
|
||||
- Dropping PrettyTable, replaced with pure JSON output in CLI.
|
||||
|
||||
### Fixed
|
||||
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
|
||||
- Not searching properly for the BOM when trying utf32/16 parent codec.
|
||||
|
||||
### Changed
|
||||
- Improving the package final size by compressing frequencies.json.
|
||||
- Huge improvement over the larges payload.
|
||||
|
||||
### Added
|
||||
- CLI now produces JSON consumable output.
|
||||
- Return ASCII if given sequences fit. Given reasonable confidence.
|
||||
|
||||
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
|
||||
|
||||
### Fixed
|
||||
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
|
||||
|
||||
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
|
||||
|
||||
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
|
||||
|
||||
### Fixed
|
||||
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
|
||||
|
||||
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
|
||||
|
||||
### Changed
|
||||
- Amend the previous release to allow prettytable 2.0 (PR #35)
|
||||
|
||||
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
|
||||
|
||||
### Fixed
|
||||
- Fix error while using the package with a python pre-release interpreter (PR #33)
|
||||
|
||||
### Changed
|
||||
- Dependencies refactoring, constraints revised.
|
||||
|
||||
### Added
|
||||
- Add python 3.9 and 3.10 to the supported interpreters
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 TAHRI Ahmed R.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for charset-normalizer
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for charset-normalizer
|
||||
</h1>
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-py3-none-any.whl#sha256=7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl#sha256=ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl#sha256=cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl#sha256=cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp39-cp39-musllinux_1_2_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1" data-requires-python=">=3.7" data-dist-info-metadata="sha256=8d5b94141b62f1d6afd7d60bbd68acb138a155d176a33518e0a28cc3b8dd9014">
|
||||
charset_normalizer-3.4.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl#sha256=5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a" data-requires-python=">=3.7" data-dist-info-metadata="sha256=47aaaa4790e1bdc8c54ab5bf6e35ee86e979b65a95d41e888c72639919cbb5c3">
|
||||
charset_normalizer-3.4.4-cp38-cp38-musllinux_1_2_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl#sha256=f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d" data-requires-python=">=3.7" data-dist-info-metadata="sha256=47aaaa4790e1bdc8c54ab5bf6e35ee86e979b65a95d41e888c72639919cbb5c3">
|
||||
charset_normalizer-3.4.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,84 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: click
|
||||
Version: 8.3.1
|
||||
Summary: Composable command line interface toolkit
|
||||
Maintainer-email: Pallets <contact@palletsprojects.com>
|
||||
Requires-Python: >=3.10
|
||||
Description-Content-Type: text/markdown
|
||||
License-Expression: BSD-3-Clause
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Typing :: Typed
|
||||
License-File: LICENSE.txt
|
||||
Requires-Dist: colorama; platform_system == 'Windows'
|
||||
Project-URL: Changes, https://click.palletsprojects.com/page/changes/
|
||||
Project-URL: Chat, https://discord.gg/pallets
|
||||
Project-URL: Documentation, https://click.palletsprojects.com/
|
||||
Project-URL: Donate, https://palletsprojects.com/donate
|
||||
Project-URL: Source, https://github.com/pallets/click/
|
||||
|
||||
<div align="center"><img src="https://raw.githubusercontent.com/pallets/click/refs/heads/stable/docs/_static/click-name.svg" alt="" height="150"></div>
|
||||
|
||||
# Click
|
||||
|
||||
Click is a Python package for creating beautiful command line interfaces
|
||||
in a composable way with as little code as necessary. It's the "Command
|
||||
Line Interface Creation Kit". It's highly configurable but comes with
|
||||
sensible defaults out of the box.
|
||||
|
||||
It aims to make the process of writing command line tools quick and fun
|
||||
while also preventing any frustration caused by the inability to
|
||||
implement an intended CLI API.
|
||||
|
||||
Click in three points:
|
||||
|
||||
- Arbitrary nesting of commands
|
||||
- Automatic help page generation
|
||||
- Supports lazy loading of subcommands at runtime
|
||||
|
||||
|
||||
## A Simple Example
|
||||
|
||||
```python
|
||||
import click
|
||||
|
||||
@click.command()
|
||||
@click.option("--count", default=1, help="Number of greetings.")
|
||||
@click.option("--name", prompt="Your name", help="The person to greet.")
|
||||
def hello(count, name):
|
||||
"""Simple program that greets NAME for a total of COUNT times."""
|
||||
for _ in range(count):
|
||||
click.echo(f"Hello, {name}!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
hello()
|
||||
```
|
||||
|
||||
```
|
||||
$ python hello.py --count=3
|
||||
Your name: Click
|
||||
Hello, Click!
|
||||
Hello, Click!
|
||||
Hello, Click!
|
||||
```
|
||||
|
||||
|
||||
## Donate
|
||||
|
||||
The Pallets organization develops and supports Click and other popular
|
||||
packages. In order to grow the community of contributors and users, and
|
||||
allow the maintainers to devote more time to the projects, [please
|
||||
donate today][].
|
||||
|
||||
[please donate today]: https://palletsprojects.com/donate
|
||||
|
||||
## Contributing
|
||||
|
||||
See our [detailed contributing documentation][contrib] for many ways to
|
||||
contribute, including reporting issues, requesting features, asking or answering
|
||||
questions, and making PRs.
|
||||
|
||||
[contrib]: https://palletsprojects.com/contributing/
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for click
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for click
|
||||
</h1>
|
||||
<a href="/click/click-8.3.1-py3-none-any.whl#sha256=981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6" data-requires-python=">=3.10" data-dist-info-metadata="sha256=5d9781acc004d2085313cf128dfad20ee4b2342a413e9971251d6d6f00fda198">
|
||||
click-8.3.1-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,211 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: consolekit
|
||||
Version: 1.9.0
|
||||
Summary: Additional utilities for click.
|
||||
Keywords: click,terminal
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/x-rst
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Environment :: Console
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
License-File: LICENSE
|
||||
Requires-Dist: click>=7.1.2
|
||||
Requires-Dist: colorama>=0.4.3; python_version < "3.10" and platform_system == "Windows"
|
||||
Requires-Dist: deprecation-alias>=0.1.1
|
||||
Requires-Dist: domdf-python-tools>=3.8.0
|
||||
Requires-Dist: mistletoe>=0.7.2
|
||||
Requires-Dist: typing-extensions!=3.10.0.1,>=3.10.0.0
|
||||
Requires-Dist: coincidence>=0.1.0 ; extra == "all"
|
||||
Requires-Dist: psutil>=5.8.0 ; extra == "all"
|
||||
Requires-Dist: pytest>=6.0.0 ; extra == "all"
|
||||
Requires-Dist: pytest-regressions>=2.0.2 ; extra == "all"
|
||||
Requires-Dist: psutil>=5.8.0 ; extra == "terminals"
|
||||
Requires-Dist: coincidence>=0.1.0 ; extra == "testing"
|
||||
Requires-Dist: pytest>=6.0.0 ; extra == "testing"
|
||||
Requires-Dist: pytest-regressions>=2.0.2 ; extra == "testing"
|
||||
Project-URL: Documentation, https://consolekit.readthedocs.io/en/latest
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/consolekit
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/consolekit/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/consolekit
|
||||
Provides-Extra: all
|
||||
Provides-Extra: terminals
|
||||
Provides-Extra: testing
|
||||
|
||||
###########
|
||||
consolekit
|
||||
###########
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Additional utilities for click.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/consolekit/latest?logo=read-the-docs
|
||||
:target: https://consolekit.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/consolekit/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/consolekit/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/consolekit/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/consolekit/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/consolekit/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/consolekit/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/consolekit/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/consolekit/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/consolekit/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/consolekit/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/consolekit?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/consolekit?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/consolekit
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/consolekit
|
||||
:target: https://pypi.org/project/consolekit/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/consolekit?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/consolekit/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/consolekit
|
||||
:target: https://pypi.org/project/consolekit/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/consolekit
|
||||
:target: https://pypi.org/project/consolekit/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/consolekit?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/consolekit
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/consolekit?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/consolekit
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/consolekit
|
||||
:target: https://github.com/domdfcoding/consolekit/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/consolekit
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/consolekit/v1.9.0
|
||||
:target: https://github.com/domdfcoding/consolekit/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/consolekit
|
||||
:target: https://github.com/domdfcoding/consolekit/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/consolekit
|
||||
:target: https://pypi.org/project/consolekit/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``consolekit`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install consolekit
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install consolekit
|
||||
|
||||
.. end installation
|
||||
|
||||
Additionally, for better support in terminals,
|
||||
install `psutil <https://pypi.org/project/psutil/>`_ by specifying the ``terminals`` extra:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install consolekit[terminals]
|
||||
|
||||
or, if you installed ``consolekit`` through conda:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install -c conda-forge psutil
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for consolekit
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for consolekit
|
||||
</h1>
|
||||
<a href="/consolekit/consolekit-1.9.0-py3-none-any.whl#sha256=2997e9d18995e024c5338d5f4b20394ec5f962ccefebce1b2a8ffa3bca507cf0" data-requires-python=">=3.7" data-dist-info-metadata="sha256=43c29372fd3df281ba99e1843b2bd52d261bf65d716eeb89c618418488f09ad7">
|
||||
consolekit-1.9.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,378 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: deprecation-alias
|
||||
Version: 0.4.0
|
||||
Summary: A wrapper around 'deprecation' providing support for deprecated aliases.
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/deprecation-alias
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/deprecation-alias/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/deprecation-alias
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and
|
||||
distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the
|
||||
copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other
|
||||
entities that control, are controlled by, or are under common control with
|
||||
that entity. For the purposes of this definition, "control" means (i) the
|
||||
power, direct or indirect, to cause the direction or management of such
|
||||
entity, whether by contract or otherwise, or (ii) ownership of
|
||||
fifty percent (50%) or more of the outstanding shares, or (iii) beneficial
|
||||
ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising
|
||||
permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation source,
|
||||
and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation
|
||||
or translation of a Source form, including but not limited to compiled
|
||||
object code, generated documentation, and conversions to
|
||||
other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object
|
||||
form, made available under the License, as indicated by a copyright notice
|
||||
that is included in or attached to the work (an example is provided in the
|
||||
Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form,
|
||||
that is based on (or derived from) the Work and for which the editorial
|
||||
revisions, annotations, elaborations, or other modifications represent,
|
||||
as a whole, an original work of authorship. For the purposes of this
|
||||
License, Derivative Works shall not include works that remain separable
|
||||
from, or merely link (or bind by name) to the interfaces of, the Work and
|
||||
Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original
|
||||
version of the Work and any modifications or additions to that Work or
|
||||
Derivative Works thereof, that is intentionally submitted to Licensor for
|
||||
inclusion in the Work by the copyright owner or by an individual or
|
||||
Legal Entity authorized to submit on behalf of the copyright owner.
|
||||
For the purposes of this definition, "submitted" means any form of
|
||||
electronic, verbal, or written communication sent to the Licensor or its
|
||||
representatives, including but not limited to communication on electronic
|
||||
mailing lists, source code control systems, and issue tracking systems
|
||||
that are managed by, or on behalf of, the Licensor for the purpose of
|
||||
discussing and improving the Work, but excluding communication that is
|
||||
conspicuously marked or otherwise designated in writing by the copyright
|
||||
owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on
|
||||
behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor
|
||||
hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
|
||||
royalty-free, irrevocable copyright license to reproduce, prepare
|
||||
Derivative Works of, publicly display, publicly perform, sublicense,
|
||||
and distribute the Work and such Derivative Works in
|
||||
Source or Object form.
|
||||
|
||||
3. Grant of Patent License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor
|
||||
hereby grants to You a perpetual, worldwide, non-exclusive, no-charge,
|
||||
royalty-free, irrevocable (except as stated in this section) patent
|
||||
license to make, have made, use, offer to sell, sell, import, and
|
||||
otherwise transfer the Work, where such license applies only to those
|
||||
patent claims licensable by such Contributor that are necessarily
|
||||
infringed by their Contribution(s) alone or by combination of their
|
||||
Contribution(s) with the Work to which such Contribution(s) was submitted.
|
||||
If You institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
|
||||
Contribution incorporated within the Work constitutes direct or
|
||||
contributory patent infringement, then any patent licenses granted to
|
||||
You under this License for that Work shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
||||
4. Redistribution.
|
||||
|
||||
You may reproduce and distribute copies of the Work or Derivative Works
|
||||
thereof in any medium, with or without modifications, and in Source or
|
||||
Object form, provided that You meet the following conditions:
|
||||
|
||||
1. You must give any other recipients of the Work or Derivative Works a
|
||||
copy of this License; and
|
||||
|
||||
2. You must cause any modified files to carry prominent notices stating
|
||||
that You changed the files; and
|
||||
|
||||
3. You must retain, in the Source form of any Derivative Works that You
|
||||
distribute, all copyright, patent, trademark, and attribution notices from
|
||||
the Source form of the Work, excluding those notices that do not pertain
|
||||
to any part of the Derivative Works; and
|
||||
|
||||
4. If the Work includes a "NOTICE" text file as part of its distribution,
|
||||
then any Derivative Works that You distribute must include a readable copy
|
||||
of the attribution notices contained within such NOTICE file, excluding
|
||||
those notices that do not pertain to any part of the Derivative Works,
|
||||
in at least one of the following places: within a NOTICE text file
|
||||
distributed as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or, within a
|
||||
display generated by the Derivative Works, if and wherever such
|
||||
third-party notices normally appear. The contents of the NOTICE file are
|
||||
for informational purposes only and do not modify the License.
|
||||
You may add Your own attribution notices within Derivative Works that You
|
||||
distribute, alongside or as an addendum to the NOTICE text from the Work,
|
||||
provided that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and may
|
||||
provide additional or different license terms and conditions for use,
|
||||
reproduction, or distribution of Your modifications, or for any such
|
||||
Derivative Works as a whole, provided Your use, reproduction, and
|
||||
distribution of the Work otherwise complies with the conditions
|
||||
stated in this License.
|
||||
|
||||
5. Submission of Contributions.
|
||||
|
||||
Unless You explicitly state otherwise, any Contribution intentionally
|
||||
submitted for inclusion in the Work by You to the Licensor shall be under
|
||||
the terms and conditions of this License, without any additional
|
||||
terms or conditions. Notwithstanding the above, nothing herein shall
|
||||
supersede or modify the terms of any separate license agreement you may
|
||||
have executed with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks.
|
||||
|
||||
This License does not grant permission to use the trade names, trademarks,
|
||||
service marks, or product names of the Licensor, except as required for
|
||||
reasonable and customary use in describing the origin of the Work and
|
||||
reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, Licensor
|
||||
provides the Work (and each Contributor provides its Contributions)
|
||||
on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
either express or implied, including, without limitation, any warranties
|
||||
or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any risks
|
||||
associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability.
|
||||
|
||||
In no event and under no legal theory, whether in tort
|
||||
(including negligence), contract, or otherwise, unless required by
|
||||
applicable law (such as deliberate and grossly negligent acts) or agreed
|
||||
to in writing, shall any Contributor be liable to You for damages,
|
||||
including any direct, indirect, special, incidental, or consequential
|
||||
damages of any character arising as a result of this License or out of
|
||||
the use or inability to use the Work (including but not limited to damages
|
||||
for loss of goodwill, work stoppage, computer failure or malfunction,
|
||||
or any and all other commercial damages or losses), even if such
|
||||
Contributor has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability.
|
||||
|
||||
While redistributing the Work or Derivative Works thereof, You may choose
|
||||
to offer, and charge a fee for, acceptance of support, warranty,
|
||||
indemnity, or other liability obligations and/or rights consistent with
|
||||
this License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf of any
|
||||
other Contributor, and only if You agree to indemnify, defend, and hold
|
||||
each Contributor harmless for any liability incurred by, or claims
|
||||
asserted against, such Contributor by reason of your accepting any such
|
||||
warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work
|
||||
|
||||
To apply the Apache License to your work, attach the following boilerplate
|
||||
notice, with the fields enclosed by brackets "[]" replaced with your own
|
||||
identifying information. (Don't include the brackets!) The text should be
|
||||
enclosed in the appropriate comment syntax for the file format. We also
|
||||
recommend that a file or class name and description of purpose be included
|
||||
on the same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2020 Dominic Davis-Foster
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
or implied. See the License for the specific language governing
|
||||
permissions and limitations under the License.
|
||||
License-File: LICENSE
|
||||
Keywords: deprecation
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: Apache Software License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6.1
|
||||
Requires-Dist: deprecation>=2.1.0
|
||||
Requires-Dist: packaging>=20.4
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
##################
|
||||
deprecation-alias
|
||||
##################
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**A wrapper around 'deprecation' providing support for deprecated aliases.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/deprecation-alias/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/deprecation-alias/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/deprecation-alias/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/deprecation-alias/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/deprecation-alias/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/deprecation-alias/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/deprecation-alias?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/deprecation-alias?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/deprecation-alias
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/deprecation-alias
|
||||
:target: https://pypi.org/project/deprecation-alias/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/deprecation-alias?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/deprecation-alias/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/deprecation-alias
|
||||
:target: https://pypi.org/project/deprecation-alias/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/deprecation-alias
|
||||
:target: https://pypi.org/project/deprecation-alias/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/deprecation-alias?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/deprecation-alias
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/deprecation-alias?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/deprecation-alias
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/deprecation-alias
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/deprecation-alias
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/deprecation-alias/v0.4.0
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/deprecation-alias
|
||||
:target: https://github.com/domdfcoding/deprecation-alias/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/deprecation-alias
|
||||
:target: https://pypi.org/project/deprecation-alias/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``deprecation-alias`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install deprecation-alias
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install deprecation-alias
|
||||
|
||||
.. end installation
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for deprecation-alias
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for deprecation-alias
|
||||
</h1>
|
||||
<a href="/deprecation-alias/deprecation_alias-0.4.0-py3-none-any.whl#sha256=a2d3cb08705d81bcc845ebbeff8e981ca7e5ef6c51478f0c771c38a54d7d7811" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=2309e22f77ba5cc3b021d68ff4836d53eb1f2c40d4b46d8414b3f49a7b5a7c83">
|
||||
deprecation_alias-0.4.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,114 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: deprecation
|
||||
Version: 2.1.0
|
||||
Summary: A library to handle automated deprecations
|
||||
Home-page: http://deprecation.readthedocs.io/
|
||||
Author: Brian Curtin
|
||||
Author-email: brian@python.org
|
||||
Maintainer: Brian Curtin
|
||||
Maintainer-email: brian@python.org
|
||||
License: Apache 2
|
||||
Project-URL: Documentation, http://deprecation.readthedocs.io/en/latest/
|
||||
Project-URL: Source, https://github.com/briancurtin/deprecation
|
||||
Project-URL: Bug Tracker, https://github.com/briancurtin/deprecation/issues
|
||||
Keywords: deprecation
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: License :: OSI Approved :: Apache Software License
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 2.7
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.5
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Requires-Dist: packaging
|
||||
|
||||
deprecation
|
||||
===========
|
||||
|
||||
.. image:: https://readthedocs.org/projects/deprecation/badge/?version=latest
|
||||
:target: http://deprecation.readthedocs.io/en/latest/
|
||||
:alt: Documentation Status
|
||||
|
||||
.. image:: https://travis-ci.org/briancurtin/deprecation.svg?branch=master
|
||||
:target: https://travis-ci.org/briancurtin/deprecation
|
||||
|
||||
.. image:: https://codecov.io/gh/briancurtin/deprecation/branch/master/graph/badge.svg
|
||||
:target: https://codecov.io/gh/briancurtin/deprecation
|
||||
|
||||
The ``deprecation`` library provides a ``deprecated`` decorator and a
|
||||
``fail_if_not_removed`` decorator for your tests. Together, the two
|
||||
enable the automation of several things:
|
||||
|
||||
1. The docstring of a deprecated method gets the deprecation details
|
||||
appended to the end of it. If you generate your API docs direct
|
||||
from your source, you don't need to worry about writing your own
|
||||
notification. You also don't need to worry about forgetting to
|
||||
write it. It's done for you.
|
||||
2. Rather than having code live on forever because you only deprecated
|
||||
it but never actually moved on from it, you can have your tests
|
||||
tell you when it's time to remove the code. The ``@deprecated``
|
||||
decorator can be told when it's time to entirely remove the code,
|
||||
which causes ``@fail_if_not_removed`` to raise an ``AssertionError``,
|
||||
causing either your unittest or py.test tests to fail.
|
||||
|
||||
See http://deprecation.readthedocs.io/ for the full documentation.
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
::
|
||||
|
||||
pip install deprecation
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
::
|
||||
|
||||
import deprecation
|
||||
|
||||
@deprecation.deprecated(deprecated_in="1.0", removed_in="2.0",
|
||||
current_version=__version__,
|
||||
details="Use the bar function instead")
|
||||
def foo():
|
||||
"""Do some stuff"""
|
||||
return 1
|
||||
|
||||
...but doesn't Python ignore ``DeprecationWarning``?
|
||||
====================================================
|
||||
|
||||
Yes, by default since 2.7—and for good reason [#]_ —and this works fine
|
||||
with that.
|
||||
|
||||
1. It often makes sense for you to run your tests with a ``-W`` flag or
|
||||
the ``PYTHONWARNINGS`` environment variable so you catch warnings
|
||||
in development and handle them appropriately. The warnings raised by
|
||||
this library show up there, as they're subclasses of the built-in
|
||||
``DeprecationWarning``. See the `Command Line
|
||||
<https://docs.python.org/2/using/cmdline.html#cmdoption-W>`_
|
||||
and `Environment Variable
|
||||
<https://docs.python.org/2/using/cmdline.html#envvar-PYTHONWARNINGS>`_
|
||||
documentation for more details.
|
||||
2. Even if you don't enable those things, the behavior of this library
|
||||
remains the same. The docstrings will still be updated and the tests
|
||||
will still fail when they need to. You'll get the benefits regardless
|
||||
of what Python cares about ``DeprecationWarning``.
|
||||
|
||||
----
|
||||
|
||||
.. [#] Exposing application users to ``DeprecationWarning``\s that are
|
||||
emitted by lower-level code needlessly involves end-users in
|
||||
"how things are done." It often leads to users raising issues
|
||||
about warnings they're presented, which on one hand is done
|
||||
rightfully so, as it's been presented to them as some sort of
|
||||
issue to resolve. However, at the same time, the warning could
|
||||
be well known and planned for. From either side, loud
|
||||
``DeprecationWarning``\s can be seen as noise that isn't
|
||||
necessary outside of development.
|
||||
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for deprecation
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for deprecation
|
||||
</h1>
|
||||
<a href="/deprecation/deprecation-2.1.0-py2.py3-none-any.whl#sha256=a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a" data-dist-info-metadata="sha256=02696153ceadfe541087ce7c8d2319319a6860cea07e79cb48a29ee4380653cf">
|
||||
deprecation-2.1.0-py2.py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,203 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: dist-meta
|
||||
Version: 0.9.0
|
||||
Summary: Parse and create Python distribution metadata.
|
||||
Project-URL: Homepage, https://github.com/repo-helper/dist-meta
|
||||
Project-URL: Issue Tracker, https://github.com/repo-helper/dist-meta/issues
|
||||
Project-URL: Source Code, https://github.com/repo-helper/dist-meta
|
||||
Project-URL: Documentation, https://dist-meta.readthedocs.io/en/latest
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2021 Dominic Davis-Foster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
License-File: LICENSE
|
||||
Keywords: dist-info,metadata,packaging,pypi
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6.1
|
||||
Requires-Dist: domdf-python-tools>=3.1.0
|
||||
Requires-Dist: handy-archives>=0.1.0
|
||||
Requires-Dist: packaging>=20.9
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
==========
|
||||
dist-meta
|
||||
==========
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Parse and create Python distribution metadata.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/dist-meta/latest?logo=read-the-docs
|
||||
:target: https://dist-meta.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/repo-helper/dist-meta/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/repo-helper/dist-meta/workflows/Linux/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/repo-helper/dist-meta/workflows/Windows/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/repo-helper/dist-meta/workflows/macOS/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/repo-helper/dist-meta/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/repo-helper/dist-meta/workflows/mypy/badge.svg
|
||||
:target: https://github.com/repo-helper/dist-meta/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/dist-meta/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/dist-meta/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/dist-meta/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/repo-helper/dist-meta?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/dist-meta?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/repo-helper/dist-meta
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/dist-meta
|
||||
:target: https://pypi.org/project/dist-meta/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/dist-meta?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/dist-meta/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/dist-meta
|
||||
:target: https://pypi.org/project/dist-meta/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/dist-meta
|
||||
:target: https://pypi.org/project/dist-meta/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/dist-meta?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/dist-meta
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/dist-meta?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/dist-meta
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/repo-helper/dist-meta
|
||||
:target: https://github.com/repo-helper/dist-meta/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/dist-meta
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/dist-meta/v0.9.0
|
||||
:target: https://github.com/repo-helper/dist-meta/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/dist-meta
|
||||
:target: https://github.com/repo-helper/dist-meta/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/dist-meta
|
||||
:target: https://pypi.org/project/dist-meta/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``dist-meta`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install dist-meta
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install dist-meta
|
||||
|
||||
.. end installation
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for dist-meta
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for dist-meta
|
||||
</h1>
|
||||
<a href="/dist-meta/dist_meta-0.9.0-py3-none-any.whl#sha256=1d38a7f5e83ab2fc5e1fb9af92d995ec27718377d000c7318367db85a2ba07db" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=07d929464ad91284dfa37ce8370376146451918ba6a0af118181cb382a9dcd2a">
|
||||
dist_meta-0.9.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,183 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: dom_toml
|
||||
Version: 2.1.0
|
||||
Summary: Dom's tools for Tom's Obvious, Minimal Language.
|
||||
Keywords: configuration,serialize,toml
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/x-rst
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
License-File: LICENSE
|
||||
Requires-Dist: domdf-python-tools>=2.8.0
|
||||
Requires-Dist: tomli>=1.2.3; python_version < "3.11"
|
||||
Project-URL: Documentation, https://dom-toml.readthedocs.io/en/latest
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/dom_toml
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/dom_toml/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/dom_toml
|
||||
|
||||
#########
|
||||
dom_toml
|
||||
#########
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Dom's tools for Tom's Obvious, Minimal Language.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/dom-toml/latest?logo=read-the-docs
|
||||
:target: https://dom-toml.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/dom_toml/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/dom_toml/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/dom_toml/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/dom_toml/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/dom_toml/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/dom_toml/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/dom_toml/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/dom_toml/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/dom_toml/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/dom_toml/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/dom_toml?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/dom_toml?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/dom_toml
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/dom_toml
|
||||
:target: https://pypi.org/project/dom_toml/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/dom_toml?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/dom_toml/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/dom_toml
|
||||
:target: https://pypi.org/project/dom_toml/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/dom_toml
|
||||
:target: https://pypi.org/project/dom_toml/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/dom_toml?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/dom_toml
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/dom_toml?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/dom_toml
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/dom_toml
|
||||
:target: https://github.com/domdfcoding/dom_toml/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/dom_toml
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/dom_toml/v2.1.0
|
||||
:target: https://github.com/domdfcoding/dom_toml/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/dom_toml
|
||||
:target: https://github.com/domdfcoding/dom_toml/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/dom_toml
|
||||
:target: https://pypi.org/project/dom_toml/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``dom_toml`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install dom_toml
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install dom_toml
|
||||
|
||||
.. end installation
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for dom-toml
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for dom-toml
|
||||
</h1>
|
||||
<a href="/dom-toml/dom_toml-2.1.0-py3-none-any.whl#sha256=f5c4921940ad34b9e95d391f44000e001688b400fd21afb5cb430e8c29172da1" data-requires-python=">=3.7" data-dist-info-metadata="sha256=b4e42073be586c35660e3e9b398fef73d11ad42d2a429237e55003ba088436d6">
|
||||
dom_toml-2.1.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,209 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: domdf_python_tools
|
||||
Version: 3.10.0
|
||||
Summary: Helpful functions for Python 🐍 🛠️
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/domdf_python_tools
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/domdf_python_tools/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/domdf_python_tools
|
||||
Project-URL: Documentation, https://domdf-python-tools.readthedocs.io/en/latest
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2019-2022 Dominic Davis-Foster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
License-File: LICENSE
|
||||
Keywords: utilities
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6
|
||||
Requires-Dist: importlib-metadata>=3.6.0; python_version < '3.9'
|
||||
Requires-Dist: importlib-resources>=3.0.0; python_version < '3.9'
|
||||
Requires-Dist: natsort>=7.0.1
|
||||
Requires-Dist: typing-extensions>=3.7.4.1
|
||||
Provides-Extra: all
|
||||
Requires-Dist: pytz>=2019.1; extra == 'all'
|
||||
Provides-Extra: dates
|
||||
Requires-Dist: pytz>=2019.1; extra == 'dates'
|
||||
Provides-Extra: testing
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
=====================
|
||||
domdf_python_tools
|
||||
=====================
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Helpful functions for Python 🐍 🛠️**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/domdf-python-tools/latest?logo=read-the-docs
|
||||
:target: https://domdf-python-tools.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/domdf_python_tools/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/domdf_python_tools/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/domdf_python_tools/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/domdf_python_tools/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/domdf_python_tools?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/domdf_python_tools?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/domdf_python_tools
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/domdf_python_tools
|
||||
:target: https://pypi.org/project/domdf_python_tools/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/domdf_python_tools?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/domdf_python_tools/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/domdf_python_tools
|
||||
:target: https://pypi.org/project/domdf_python_tools/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/domdf_python_tools
|
||||
:target: https://pypi.org/project/domdf_python_tools/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/domdf_python_tools?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/domdf_python_tools
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/domdf_python_tools?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/domdf_python_tools
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/domdf_python_tools
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/domdf_python_tools
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/domdf_python_tools/v3.10.0
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/domdf_python_tools
|
||||
:target: https://github.com/domdfcoding/domdf_python_tools/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/domdf_python_tools
|
||||
:target: https://pypi.org/project/domdf_python_tools/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
|
||||
**Note:** Before version 3 ``domdf_python_tools`` was licensed under the LGPLv3+.
|
||||
Version 3 and later are licensed under the MIT License.
|
||||
|
||||
|
||||
.. start installation
|
||||
|
||||
``domdf_python_tools`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install domdf_python_tools
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install domdf_python_tools
|
||||
|
||||
.. end installation
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for domdf-python-tools
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for domdf-python-tools
|
||||
</h1>
|
||||
<a href="/domdf-python-tools/domdf_python_tools-3.10.0-py3-none-any.whl#sha256=5e71c1be71bbcc1f881d690c8984b60e64298ec256903b3147f068bc33090c36" data-requires-python=">=3.6" data-dist-info-metadata="sha256=7e6c54a34f6e96c6dd0659187c87f325e29b3edf2c81e4ba4bac990ca6b70d66">
|
||||
domdf_python_tools-3.10.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,187 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: handy-archives
|
||||
Version: 0.2.0
|
||||
Summary: Some handy archive helpers for Python.
|
||||
Keywords: archive,shutil,tar,zip
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
Requires-Python: >=3.6.1
|
||||
Description-Content-Type: text/x-rst
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving
|
||||
Classifier: Topic :: System :: Archiving :: Compression
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Dist: coincidence>=0.2.0 ; extra == "all"
|
||||
Requires-Dist: pytest>=6.0.0 ; extra == "all"
|
||||
Requires-Dist: coincidence>=0.2.0 ; extra == "testing"
|
||||
Requires-Dist: pytest>=6.0.0 ; extra == "testing"
|
||||
Project-URL: Documentation, https://handy-archives.readthedocs.io/en/latest
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/handy-archives
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/handy-archives/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/handy-archives
|
||||
Provides-Extra: all
|
||||
Provides-Extra: testing
|
||||
|
||||
===============
|
||||
handy-archives
|
||||
===============
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Some handy archive helpers for Python.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/handy-archives/latest?logo=read-the-docs
|
||||
:target: https://handy-archives.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/handy-archives/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/handy-archives/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/handy-archives/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/handy-archives/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/handy-archives/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/handy-archives/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/handy-archives/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/handy-archives/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/handy-archives/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/handy-archives/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/handy-archives?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/handy-archives?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/handy-archives
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/handy-archives
|
||||
:target: https://pypi.org/project/handy-archives/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/handy-archives?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/handy-archives/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/handy-archives
|
||||
:target: https://pypi.org/project/handy-archives/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/handy-archives
|
||||
:target: https://pypi.org/project/handy-archives/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/domdfcoding/handy-archives?logo=anaconda
|
||||
:target: https://anaconda.org/domdfcoding/handy-archives
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/domdfcoding/handy-archives?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/domdfcoding/handy-archives
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/handy-archives
|
||||
:target: https://github.com/domdfcoding/handy-archives/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/handy-archives
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/handy-archives/v0.2.0
|
||||
:target: https://github.com/domdfcoding/handy-archives/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/handy-archives
|
||||
:target: https://github.com/domdfcoding/handy-archives/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/handy-archives
|
||||
:target: https://pypi.org/project/handy-archives/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``handy-archives`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install handy-archives
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
* First add the required channels
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda config --add channels https://conda.anaconda.org/conda-forge
|
||||
$ conda config --add channels https://conda.anaconda.org/domdfcoding
|
||||
|
||||
* Then install
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install handy-archives
|
||||
|
||||
.. end installation
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for handy-archives
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for handy-archives
|
||||
</h1>
|
||||
<a href="/handy-archives/handy_archives-0.2.0-py3-none-any.whl#sha256=8495e08f3cd1c452fe65570db1869db07f709149f85c7e9cd8f3f461df436318" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=1c8ed778cedfb20d025aff1e14f2f5f42de88c881ac96493f0a337be3f48078e">
|
||||
handy_archives-0.2.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,230 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: hatch-requirements-txt
|
||||
Version: 0.4.1
|
||||
Summary: Hatchling plugin to read project dependencies from requirements.txt
|
||||
Project-URL: Homepage, https://github.com/repo-helper/hatch-requirements-txt
|
||||
Project-URL: Issue Tracker, https://github.com/repo-helper/hatch-requirements-txt/issues
|
||||
Project-URL: Source Code, https://github.com/repo-helper/hatch-requirements-txt
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2022 Dominic Davis-Foster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
License-File: LICENSE
|
||||
Keywords: dependencies,hatch,requirements
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Framework :: Hatch
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6.1
|
||||
Requires-Dist: hatchling>=0.21.0
|
||||
Requires-Dist: packaging>=21.3
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
=======================
|
||||
hatch-requirements-txt
|
||||
=======================
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Hatchling plugin to read project dependencies from requirements.txt**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |hatch| |license| |language| |requires|
|
||||
|
||||
.. |actions_linux| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Linux/badge.svg
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Windows/badge.svg
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/macOS/badge.svg
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/repo-helper/hatch-requirements-txt/workflows/mypy/badge.svg
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/hatch-requirements-txt/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/hatch-requirements-txt/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/hatch-requirements-txt/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/repo-helper/hatch-requirements-txt?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/hatch-requirements-txt?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/repo-helper/hatch-requirements-txt
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/hatch-requirements-txt
|
||||
:target: https://pypi.org/project/hatch-requirements-txt/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/hatch-requirements-txt?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/hatch-requirements-txt/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/hatch-requirements-txt
|
||||
:target: https://pypi.org/project/hatch-requirements-txt/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/hatch-requirements-txt
|
||||
:target: https://pypi.org/project/hatch-requirements-txt/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/hatch-requirements-txt?logo=anaconda
|
||||
:target: https://anaconda.org/conda-forge/hatch-requirements-txt
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/hatch-requirements-txt?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/conda-forge/hatch-requirements-txt
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |hatch| image:: https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg
|
||||
:target: https://github.com/pypa/hatch
|
||||
:alt: Hatch project
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/repo-helper/hatch-requirements-txt
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/hatch-requirements-txt
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/hatch-requirements-txt/v0.4.1
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/hatch-requirements-txt
|
||||
:target: https://github.com/repo-helper/hatch-requirements-txt/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/hatch-requirements-txt
|
||||
:target: https://pypi.org/project/hatch-requirements-txt/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
|
||||
|
||||
Usage
|
||||
========
|
||||
|
||||
In your ``pyproject.toml`` make the following changes:
|
||||
|
||||
* Append ``hatch-requirements-txt`` to ``build-system.requires``.
|
||||
* Append ``"dependencies"`` to ``project.dynamic``.
|
||||
* Add the following table:
|
||||
|
||||
.. code-block:: toml
|
||||
|
||||
[tool.hatch.metadata.hooks.requirements_txt]
|
||||
files = ["requirements.txt"]
|
||||
|
||||
The resulting ``pyproject.toml`` should look something like:
|
||||
|
||||
.. code-block:: toml
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling", "hatch-requirements-txt"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "my-project"
|
||||
version = "1.0.0"
|
||||
dynamic = ["dependencies"]
|
||||
|
||||
[tool.hatch.metadata.hooks.requirements_txt]
|
||||
files = ["requirements.txt"]
|
||||
|
||||
You can also define groups of `optional dependencies <https://hatch.pypa.io/latest/config/dependency/#features>`_
|
||||
(also known as "features") by appending ``optional-dependencies`` to ``project.dynamic`` and adding a table like:
|
||||
|
||||
.. code-block:: toml
|
||||
|
||||
[tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
|
||||
crypto = ["requirements-crypto.txt"]
|
||||
fastjson = ["requirements-fastjson.txt"]
|
||||
cli = ["requirements-cli.txt"]
|
||||
|
||||
|
||||
Requirements file format
|
||||
============================
|
||||
|
||||
``hatch-requirements-txt`` only supports a subset of the ``requirements.txt`` format_ supported by ``pip``.
|
||||
The following are supported:
|
||||
|
||||
* requirement specifiers, per `PEP 508`_
|
||||
* Comments, prefixed with a ``#``.
|
||||
* ``--<option>`` options, both on their own line and after a requirement specifier.
|
||||
Note however that the options themselves are ignored.
|
||||
|
||||
The following are unsupported within ``requirements.txt`` files:
|
||||
|
||||
* Editable install commands with the ``-e`` option,
|
||||
* References to other requirements or constraints files with the ``-r`` or ``-c`` options.
|
||||
* References to paths on the local filesystem, or URLs.
|
||||
|
||||
**TL;DR**
|
||||
For best compatibility, ensure all lines in your ``requirements.txt`` files
|
||||
are valid PEP 508 requirements, or comments starting with a ``#``.
|
||||
|
||||
.. _format: https://pip.pypa.io/en/stable/reference/requirements-file-format/
|
||||
.. _PEP 508: https://peps.python.org/pep-0508/
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for hatch-requirements-txt
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for hatch-requirements-txt
|
||||
</h1>
|
||||
<a href="/hatch-requirements-txt/hatch_requirements_txt-0.4.1-py3-none-any.whl#sha256=13c6ab7707cbc0aba128b3626dacae4be7065f1dbafc792461c9f547b3c3ffde" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=363835e529855acbb3f275b7c7fbb79901e5b4ac2b336a75765b33db5cb6f333">
|
||||
hatch_requirements_txt-0.4.1-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,6 +1,6 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: hatchling
|
||||
Version: 1.28.0
|
||||
Version: 1.27.0
|
||||
Summary: Modern, extensible Python build backend
|
||||
Project-URL: Homepage, https://hatch.pypa.io/latest/
|
||||
Project-URL: Sponsor, https://github.com/sponsors/ofek
|
||||
|
|
@ -13,18 +13,19 @@ License-File: LICENSE.txt
|
|||
Keywords: build,hatch,packaging
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Build Tools
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Requires-Python: >=3.10
|
||||
Requires-Python: >=3.8
|
||||
Requires-Dist: packaging>=24.2
|
||||
Requires-Dist: pathspec>=0.10.1
|
||||
Requires-Dist: pluggy>=1.0.0
|
||||
|
|
@ -41,7 +42,7 @@ Description-Content-Type: text/markdown
|
|||
| | |
|
||||
| --- | --- |
|
||||
| Package | [](https://pypi.org/project/hatchling/) [](https://pypi.org/project/hatchling/) [](https://pypi.org/project/hatchling/) |
|
||||
| Meta | [](https://github.com/pypa/hatch) [](https://github.com/astral-sh/ruff) [](https://github.com/psf/black) [](https://github.com/python/mypy) [](https://spdx.org/licenses/) [](https://github.com/sponsors/ofek) |
|
||||
| Meta | [](https://github.com/pypa/hatch) [](https://github.com/astral-sh/ruff) [](https://github.com/psf/black) [](https://github.com/python/mypy) [](https://spdx.org/licenses/) [](https://github.com/sponsors/ofek) |
|
||||
|
||||
</div>
|
||||
|
||||
Binary file not shown.
|
|
@ -12,8 +12,8 @@
|
|||
<h1>
|
||||
Links for hatchling
|
||||
</h1>
|
||||
<a href="/hatchling/hatchling-1.28.0-py3-none-any.whl#sha256=dc48722b68b3f4bbfa3ff618ca07cdea6750e7d03481289ffa8be1521d18a961" data-requires-python=">=3.10" data-dist-info-metadata="sha256=acd3b755437aa07859ebe3ed9b71d94536fa183cc851bbd95b437ba3fe7e09b6">
|
||||
hatchling-1.28.0-py3-none-any.whl
|
||||
<a href="/hatchling/hatchling-1.27.0-py3-none-any.whl#sha256=d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b" data-requires-python=">=3.8" data-dist-info-metadata="sha256=a224ea37d2658f75ccf569e61d849ff7e9297cdac2396022278e82a47b9fed3e">
|
||||
hatchling-1.27.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -37,18 +37,6 @@
|
|||
aiosqlite
|
||||
</a>
|
||||
<br />
|
||||
<a href="/airium/">
|
||||
airium
|
||||
</a>
|
||||
<br />
|
||||
<a href="/apeye/">
|
||||
apeye
|
||||
</a>
|
||||
<br />
|
||||
<a href="/apeye-core/">
|
||||
apeye-core
|
||||
</a>
|
||||
<br />
|
||||
<a href="/asyncua/">
|
||||
asyncua
|
||||
</a>
|
||||
|
|
@ -73,34 +61,18 @@
|
|||
calver
|
||||
</a>
|
||||
<br />
|
||||
<a href="/certifi/">
|
||||
certifi
|
||||
</a>
|
||||
<br />
|
||||
<a href="/cffi/">
|
||||
cffi
|
||||
</a>
|
||||
<br />
|
||||
<a href="/charset-normalizer/">
|
||||
charset-normalizer
|
||||
</a>
|
||||
<br />
|
||||
<a href="/choreographer/">
|
||||
choreographer
|
||||
</a>
|
||||
<br />
|
||||
<a href="/click/">
|
||||
click
|
||||
</a>
|
||||
<br />
|
||||
<a href="/cloudpickle/">
|
||||
cloudpickle
|
||||
</a>
|
||||
<br />
|
||||
<a href="/consolekit/">
|
||||
consolekit
|
||||
</a>
|
||||
<br />
|
||||
<a href="/cryptography/">
|
||||
cryptography
|
||||
</a>
|
||||
|
|
@ -109,26 +81,6 @@
|
|||
Cython
|
||||
</a>
|
||||
<br />
|
||||
<a href="/deprecation/">
|
||||
deprecation
|
||||
</a>
|
||||
<br />
|
||||
<a href="/deprecation-alias/">
|
||||
deprecation-alias
|
||||
</a>
|
||||
<br />
|
||||
<a href="/dist-meta/">
|
||||
dist-meta
|
||||
</a>
|
||||
<br />
|
||||
<a href="/dom-toml/">
|
||||
dom_toml
|
||||
</a>
|
||||
<br />
|
||||
<a href="/domdf-python-tools/">
|
||||
domdf_python_tools
|
||||
</a>
|
||||
<br />
|
||||
<a href="/expandvars/">
|
||||
expandvars
|
||||
</a>
|
||||
|
|
@ -157,18 +109,10 @@
|
|||
greenlet
|
||||
</a>
|
||||
<br />
|
||||
<a href="/handy-archives/">
|
||||
handy-archives
|
||||
</a>
|
||||
<br />
|
||||
<a href="/hatch-fancy-pypi-readme/">
|
||||
hatch-fancy-pypi-readme
|
||||
</a>
|
||||
<br />
|
||||
<a href="/hatch-requirements-txt/">
|
||||
hatch-requirements-txt
|
||||
</a>
|
||||
<br />
|
||||
<a href="/hatch-vcs/">
|
||||
hatch-vcs
|
||||
</a>
|
||||
|
|
@ -221,18 +165,10 @@
|
|||
meson-python
|
||||
</a>
|
||||
<br />
|
||||
<a href="/mistletoe/">
|
||||
mistletoe
|
||||
</a>
|
||||
<br />
|
||||
<a href="/multidict/">
|
||||
multidict
|
||||
</a>
|
||||
<br />
|
||||
<a href="/natsort/">
|
||||
natsort
|
||||
</a>
|
||||
<br />
|
||||
<a href="/networkx/">
|
||||
networkx
|
||||
</a>
|
||||
|
|
@ -269,10 +205,6 @@
|
|||
pkgconfig
|
||||
</a>
|
||||
<br />
|
||||
<a href="/platformdirs/">
|
||||
platformdirs
|
||||
</a>
|
||||
<br />
|
||||
<a href="/pluggy/">
|
||||
pluggy
|
||||
</a>
|
||||
|
|
@ -329,10 +261,6 @@
|
|||
pyproject-metadata
|
||||
</a>
|
||||
<br />
|
||||
<a href="/pyproject-parser/">
|
||||
pyproject-parser
|
||||
</a>
|
||||
<br />
|
||||
<a href="/pytest/">
|
||||
pytest
|
||||
</a>
|
||||
|
|
@ -357,10 +285,6 @@
|
|||
redis
|
||||
</a>
|
||||
<br />
|
||||
<a href="/requests/">
|
||||
requests
|
||||
</a>
|
||||
<br />
|
||||
<a href="/s3fs/">
|
||||
s3fs
|
||||
</a>
|
||||
|
|
@ -405,14 +329,6 @@
|
|||
shap
|
||||
</a>
|
||||
<br />
|
||||
<a href="/shippinglabel/">
|
||||
shippinglabel
|
||||
</a>
|
||||
<br />
|
||||
<a href="/simple503/">
|
||||
simple503
|
||||
</a>
|
||||
<br />
|
||||
<a href="/simplejson/">
|
||||
simplejson
|
||||
</a>
|
||||
|
|
@ -473,10 +389,6 @@
|
|||
wheel
|
||||
</a>
|
||||
<br />
|
||||
<a href="/whey/">
|
||||
whey
|
||||
</a>
|
||||
<br />
|
||||
<a href="/wrapt/">
|
||||
wrapt
|
||||
</a>
|
||||
|
|
|
|||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for mistletoe
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for mistletoe
|
||||
</h1>
|
||||
<a href="/mistletoe/mistletoe-1.5.0-py3-none-any.whl#sha256=d4e77b991b998c5efe3c4eab4e1b472263b5349688acd50d79bd9a6c317a9df1" data-requires-python="~=3.5" data-dist-info-metadata="sha256=4bcb1dceef05f4cbfde4c90c9c9cd88836dcef7511b70914f63f2245b51c635f">
|
||||
mistletoe-1.5.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,307 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: mistletoe
|
||||
Version: 1.5.0
|
||||
Summary: A fast, extensible Markdown parser in pure Python.
|
||||
Home-page: https://github.com/miyuchina/mistletoe
|
||||
Author: Mi Yu
|
||||
Author-email: hello@afteryu.me
|
||||
License: MIT
|
||||
Keywords: markdown lexer parser development
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.5
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
||||
Requires-Python: ~=3.5
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: description-content-type
|
||||
Dynamic: home-page
|
||||
Dynamic: keywords
|
||||
Dynamic: license
|
||||
Dynamic: license-file
|
||||
Dynamic: requires-python
|
||||
Dynamic: summary
|
||||
|
||||
<h1>mistletoe<img src='https://cdn.rawgit.com/miyuchina/mistletoe/master/resources/logo.svg' align='right' width='128' height='128'></h1>
|
||||
|
||||
[![Build Status][build-badge]][github-actions]
|
||||
[![Coverage Status][cover-badge]][coveralls]
|
||||
[![PyPI][pypi-badge]][pypi]
|
||||
[![is wheel][wheel-badge]][pypi]
|
||||
|
||||
mistletoe is a Markdown parser in pure Python,
|
||||
designed to be fast, spec-compliant and fully customizable.
|
||||
|
||||
Apart from being the fastest
|
||||
CommonMark-compliant Markdown parser implementation in pure Python,
|
||||
mistletoe also supports easy definitions of custom tokens.
|
||||
Parsing Markdown into an abstract syntax tree
|
||||
also allows us to swap out renderers for different output formats,
|
||||
without touching any of the core components.
|
||||
|
||||
Remember to spell mistletoe in lowercase!
|
||||
|
||||
Features
|
||||
--------
|
||||
* **Fast**:
|
||||
mistletoe is the fastest implementation of CommonMark in Python.
|
||||
See the [performance][performance] section for details.
|
||||
|
||||
* **Spec-compliant**:
|
||||
CommonMark is [a useful, high-quality project][oilshell].
|
||||
mistletoe follows the [CommonMark specification][commonmark]
|
||||
to resolve ambiguities during parsing.
|
||||
Outputs are predictable and well-defined.
|
||||
|
||||
* **Extensible**:
|
||||
Strikethrough and tables are supported natively,
|
||||
and custom block-level and span-level tokens can easily be added.
|
||||
Writing a new renderer for mistletoe is a relatively
|
||||
trivial task.
|
||||
|
||||
You can even write [a Lisp][scheme] in it.
|
||||
|
||||
Output formats
|
||||
--------------
|
||||
|
||||
Renderers for the following "core" output formats exist within the mistletoe
|
||||
main package:
|
||||
|
||||
* HTML
|
||||
* LaTeX
|
||||
* AST (Abstract Syntax Tree; handy for debugging the parsing process)
|
||||
* Markdown (Can be used to reflow the text, or make other types of automated
|
||||
changes to Markdown documents)
|
||||
|
||||
Renderers for the following output formats can be found
|
||||
in the [contrib][contrib] package:
|
||||
|
||||
* HTML with MathJax (_mathjax.py_)
|
||||
* HTML with code highlighting (using Pygments) (_pygments\_renderer.py_)
|
||||
* HTML with TOC (for programmatical use) (_toc\_renderer.py_)
|
||||
* HTML with support for GitHub wiki links (_github\_wiki.py_)
|
||||
* Jira Markdown (_jira\_renderer.py_)
|
||||
* XWiki Syntax (_xwiki20\_renderer.py_)
|
||||
* Scheme (_scheme.py_)
|
||||
|
||||
Installation
|
||||
------------
|
||||
mistletoe is tested for Python 3.5 and above. Install mistletoe with pip:
|
||||
|
||||
```sh
|
||||
pip3 install mistletoe
|
||||
```
|
||||
|
||||
Alternatively, clone the repo:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/miyuchina/mistletoe.git
|
||||
cd mistletoe
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
This installs mistletoe in "editable" mode (because of the `-e` option).
|
||||
That means that any changes made to the source code will get visible
|
||||
immediately - that's because Python only makes a link to the specified
|
||||
directory (`.`) instead of copying the files to the standard packages
|
||||
folder.
|
||||
|
||||
See the [contributing][contributing] doc for how to contribute to mistletoe.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
### Usage from Python
|
||||
|
||||
Here's how you can use mistletoe in a Python script:
|
||||
|
||||
```python
|
||||
import mistletoe
|
||||
|
||||
with open('foo.md', 'r') as fin:
|
||||
rendered = mistletoe.markdown(fin)
|
||||
```
|
||||
|
||||
`mistletoe.markdown()` uses mistletoe's default settings: allowing HTML mixins
|
||||
and rendering to HTML. The function also accepts an additional argument
|
||||
`renderer`. To produce LaTeX output:
|
||||
|
||||
```python
|
||||
import mistletoe
|
||||
from mistletoe.latex_renderer import LaTeXRenderer
|
||||
|
||||
with open('foo.md', 'r') as fin:
|
||||
rendered = mistletoe.markdown(fin, LaTeXRenderer)
|
||||
```
|
||||
|
||||
To reflow the text in a Markdown document with a max line length of 20 characters:
|
||||
|
||||
```python
|
||||
import mistletoe
|
||||
from mistletoe.markdown_renderer import MarkdownRenderer
|
||||
|
||||
with open('dev-guide.md', 'r') as fin:
|
||||
with MarkdownRenderer(max_line_length=20) as renderer:
|
||||
print(renderer.render(mistletoe.Document(fin)))
|
||||
```
|
||||
|
||||
Finally, here's how you would manually specify extra tokens via a renderer.
|
||||
In the following example, we use `HtmlRenderer` to render
|
||||
the AST. The renderer itself adds `HtmlBlock` and `HtmlSpan` tokens to the parsing
|
||||
process. The result should be equal to the output obtained from
|
||||
the first example above.
|
||||
|
||||
```python
|
||||
from mistletoe import Document, HtmlRenderer
|
||||
|
||||
with open('foo.md', 'r') as fin:
|
||||
with HtmlRenderer() as renderer: # or: `with HtmlRenderer(AnotherToken1, AnotherToken2) as renderer:`
|
||||
doc = Document(fin) # parse the lines into AST
|
||||
rendered = renderer.render(doc) # render the AST
|
||||
# internal lists of tokens to be parsed are automatically reset when exiting this `with` block
|
||||
```
|
||||
|
||||
**Important**: As can be seen from the example above,
|
||||
the parsing phase is currently tightly connected with initiation
|
||||
and closing of a renderer. Therefore, you should never call `Document(...)`
|
||||
outside of a `with ... as renderer` block, unless you know what you are doing.
|
||||
|
||||
### Usage from command-line
|
||||
|
||||
pip installation enables mistletoe's command-line utility. Type the following
|
||||
directly into your shell:
|
||||
|
||||
```sh
|
||||
mistletoe foo.md
|
||||
```
|
||||
|
||||
This will transpile `foo.md` into HTML, and dump the output to stdout. To save
|
||||
the HTML, direct the output into a file:
|
||||
|
||||
```sh
|
||||
mistletoe foo.md > out.html
|
||||
```
|
||||
|
||||
You can use a different renderer by including the full path to the renderer
|
||||
class after a `-r` or `--renderer` flag. For example, to transpile into
|
||||
LaTeX:
|
||||
|
||||
```sh
|
||||
mistletoe foo.md --renderer mistletoe.latex_renderer.LaTeXRenderer
|
||||
```
|
||||
|
||||
and similarly for a renderer in the contrib package:
|
||||
|
||||
```sh
|
||||
mistletoe foo.md --renderer mistletoe.contrib.jira_renderer.JiraRenderer
|
||||
```
|
||||
|
||||
|
||||
### mistletoe interactive mode
|
||||
|
||||
Running `mistletoe` without specifying a file will land you in interactive
|
||||
mode. Like Python's REPL, interactive mode allows you to test how your
|
||||
Markdown will be interpreted by mistletoe:
|
||||
|
||||
```html
|
||||
mistletoe [version 0.7.2] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
>>> some **bold** text
|
||||
... and some *italics*
|
||||
...
|
||||
<p>some <strong>bold</strong> text
|
||||
and some <em>italics</em></p>
|
||||
>>>
|
||||
```
|
||||
|
||||
The interactive mode also accepts the `--renderer` flag:
|
||||
|
||||
```latex
|
||||
mistletoe [version 0.7.2] (interactive)
|
||||
Type Ctrl-D to complete input, or Ctrl-C to exit.
|
||||
Using renderer: LaTeXRenderer
|
||||
>>> some **bold** text
|
||||
... and some *italics*
|
||||
...
|
||||
\documentclass{article}
|
||||
\begin{document}
|
||||
|
||||
some \textbf{bold} text
|
||||
and some \textit{italics}
|
||||
\end{document}
|
||||
>>>
|
||||
```
|
||||
|
||||
Who uses mistletoe?
|
||||
-------------------
|
||||
|
||||
mistletoe is used by projects of various target audience.
|
||||
You can find some concrete projects in the "Used by" section
|
||||
on [Libraries.io][libraries-mistletoe], but this is definitely not a complete
|
||||
list.
|
||||
Also a list of [Dependents][github-dependents] is tracked by GitHub directly.
|
||||
|
||||
### Run mistletoe from CopyQ
|
||||
|
||||
One notable example is running mistletoe as a Markdown converter from the
|
||||
advanced clipboard manager called [CopyQ][copyq]. One just needs to install
|
||||
the [Convert Markdown to ...][copyq-convert-md] custom script command
|
||||
and then run this command on any selected Markdown text.
|
||||
|
||||
Why mistletoe?
|
||||
--------------
|
||||
|
||||
"For fun," says David Beazley.
|
||||
|
||||
Further reading
|
||||
---------------
|
||||
|
||||
* [Performance][performance]
|
||||
* [Developer's Guide](dev-guide.md)
|
||||
|
||||
Copyright & License
|
||||
-------------------
|
||||
* mistletoe's logo uses artwork by [Freepik][icon], under
|
||||
[CC BY 3.0][cc-by].
|
||||
* mistletoe is released under [MIT][license].
|
||||
|
||||
[build-badge]: https://img.shields.io/github/actions/workflow/status/miyuchina/mistletoe/python-package.yml?style=flat-square
|
||||
[cover-badge]: https://img.shields.io/coveralls/miyuchina/mistletoe.svg?style=flat-square
|
||||
[pypi-badge]: https://img.shields.io/pypi/v/mistletoe.svg?style=flat-square
|
||||
[wheel-badge]: https://img.shields.io/pypi/wheel/mistletoe.svg?style=flat-square
|
||||
[github-actions]: https://github.com/miyuchina/mistletoe/actions/workflows/python-package.yml
|
||||
[coveralls]: https://coveralls.io/github/miyuchina/mistletoe?branch=master
|
||||
[pypi]: https://pypi.python.org/pypi/mistletoe
|
||||
[mistune]: https://github.com/lepture/mistune
|
||||
[python-markdown]: https://github.com/waylan/Python-Markdown
|
||||
[python-markdown2]: https://github.com/trentm/python-markdown2
|
||||
[commonmark-py]: https://github.com/rtfd/CommonMark-py
|
||||
[performance]: performance.md
|
||||
[oilshell]: https://www.oilshell.org/blog/2018/02/14.html
|
||||
[commonmark]: https://spec.commonmark.org/
|
||||
[contrib]: https://github.com/miyuchina/mistletoe/tree/master/mistletoe/contrib
|
||||
[scheme]: https://github.com/miyuchina/mistletoe/blob/master/mistletoe/contrib/scheme.py
|
||||
[contributing]: CONTRIBUTING.md
|
||||
[icon]: https://www.freepik.com
|
||||
[cc-by]: https://creativecommons.org/licenses/by/3.0/us/
|
||||
[license]: LICENSE
|
||||
[pythonpath]: https://stackoverflow.com/questions/16107526/how-to-flexibly-change-pythonpath
|
||||
[libraries-mistletoe]: https://libraries.io/pypi/mistletoe
|
||||
[copyq]: https://hluk.github.io/CopyQ/
|
||||
[copyq-convert-md]: https://github.com/hluk/copyq-commands/tree/master/Global#convert-markdown-to-
|
||||
[github-dependents]: https://github.com/miyuchina/mistletoe/network/dependents
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for natsort
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for natsort
|
||||
</h1>
|
||||
<a href="/natsort/natsort-8.4.0-py3-none-any.whl#sha256=4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c" data-requires-python=">=3.7" data-dist-info-metadata="sha256=fe0934ce644ef39d034647b3280cc7e8afeaad92732ca70eb5f9e946db65f4a3">
|
||||
natsort-8.4.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,548 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: natsort
|
||||
Version: 8.4.0
|
||||
Summary: Simple yet flexible natural sorting in Python.
|
||||
Home-page: https://github.com/SethMMorton/natsort
|
||||
Author: Seth M. Morton
|
||||
Author-email: drtuba78@gmail.com
|
||||
License: MIT
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: Science/Research
|
||||
Classifier: Intended Audience :: System Administrators
|
||||
Classifier: Intended Audience :: Information Technology
|
||||
Classifier: Intended Audience :: Financial and Insurance Industry
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
||||
Classifier: Topic :: Utilities
|
||||
Classifier: Topic :: Text Processing
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/x-rst
|
||||
License-File: LICENSE
|
||||
Provides-Extra: fast
|
||||
Requires-Dist: fastnumbers (>=2.0.0) ; extra == 'fast'
|
||||
Provides-Extra: icu
|
||||
Requires-Dist: PyICU (>=1.0.0) ; extra == 'icu'
|
||||
|
||||
natsort
|
||||
=======
|
||||
|
||||
.. image:: https://img.shields.io/pypi/v/natsort.svg
|
||||
:target: https://pypi.org/project/natsort/
|
||||
|
||||
.. image:: https://img.shields.io/pypi/pyversions/natsort.svg
|
||||
:target: https://pypi.org/project/natsort/
|
||||
|
||||
.. image:: https://img.shields.io/pypi/l/natsort.svg
|
||||
:target: https://github.com/SethMMorton/natsort/blob/main/LICENSE
|
||||
|
||||
.. image:: https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg
|
||||
:target: https://github.com/SethMMorton/natsort/actions
|
||||
|
||||
.. image:: https://codecov.io/gh/SethMMorton/natsort/branch/main/graph/badge.svg
|
||||
:target: https://codecov.io/gh/SethMMorton/natsort
|
||||
|
||||
.. image:: https://img.shields.io/pypi/dw/natsort.svg
|
||||
:target: https://pypi.org/project/natsort/
|
||||
|
||||
Simple yet flexible natural sorting in Python.
|
||||
|
||||
- Source Code: https://github.com/SethMMorton/natsort
|
||||
- Downloads: https://pypi.org/project/natsort/
|
||||
- Documentation: https://natsort.readthedocs.io/
|
||||
|
||||
- `Examples and Recipes`_
|
||||
- `How Does Natsort Work?`_
|
||||
- `API`_
|
||||
|
||||
- `Quick Description`_
|
||||
- `Quick Examples`_
|
||||
- `FAQ`_
|
||||
- `Requirements`_
|
||||
- `Optional Dependencies`_
|
||||
- `Installation`_
|
||||
- `How to Run Tests`_
|
||||
- `How to Build Documentation`_
|
||||
- `Dropped Deprecated APIs`_
|
||||
- `History`_
|
||||
|
||||
**NOTE**: Please see the `Dropped Deprecated APIs`_ section for changes.
|
||||
|
||||
Quick Description
|
||||
-----------------
|
||||
|
||||
When you try to sort a list of strings that contain numbers, the normal python
|
||||
sort algorithm sorts lexicographically, so you might not get the results that
|
||||
you expect:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
|
||||
>>> sorted(a)
|
||||
['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in']
|
||||
|
||||
Notice that it has the order ('1', '10', '2') - this is because the list is
|
||||
being sorted in lexicographical order, which sorts numbers like you would
|
||||
letters (i.e. 'b', 'ba', 'c').
|
||||
|
||||
`natsort`_ provides a function `natsorted()`_ that helps sort lists
|
||||
"naturally" ("naturally" is rather ill-defined, but in general it means
|
||||
sorting based on meaning and not computer code point).
|
||||
Using `natsorted()`_ is simple:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from natsort import natsorted
|
||||
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
|
||||
>>> natsorted(a)
|
||||
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
|
||||
|
||||
`natsorted()`_ identifies numbers anywhere in a string and sorts them
|
||||
naturally. Below are some other things you can do with `natsort`_
|
||||
(also see the `Examples and Recipes`_ for a quick start guide, or the
|
||||
`API`_ for complete details).
|
||||
|
||||
**Note**: `natsorted()`_ is designed to be a drop-in replacement for the
|
||||
built-in `sorted()`_ function. Like `sorted()`_, `natsorted()`_
|
||||
`does not sort in-place`. To sort a list and assign the output to the same
|
||||
variable, you must explicitly assign the output to a variable:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
|
||||
>>> natsorted(a)
|
||||
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
|
||||
>>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list
|
||||
['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
|
||||
>>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a'
|
||||
>>> print(a)
|
||||
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
|
||||
|
||||
Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for
|
||||
an alternate way to sort in-place naturally.
|
||||
|
||||
Quick Examples
|
||||
--------------
|
||||
|
||||
- `Sorting Versions`_
|
||||
- `Sort Paths Like My File Browser (e.g. Windows Explorer on Windows)`_
|
||||
- `Sorting by Real Numbers (i.e. Signed Floats)`_
|
||||
- `Locale-Aware Sorting (or "Human Sorting")`_
|
||||
- `Further Customizing Natsort`_
|
||||
- `Sorting Mixed Types`_
|
||||
- `Handling Bytes`_
|
||||
- `Generating a Reusable Sorting Key and Sorting In-Place`_
|
||||
- `Other Useful Things`_
|
||||
|
||||
Sorting Versions
|
||||
++++++++++++++++
|
||||
|
||||
`natsort`_ does not actually *comprehend* version numbers.
|
||||
It just so happens that the most common versioning schemes are designed to
|
||||
work with standard natural sorting techniques; these schemes include
|
||||
``MAJOR.MINOR``, ``MAJOR.MINOR.PATCH``, ``YEAR.MONTH.DAY``. If your data
|
||||
conforms to a scheme like this, then it will work out-of-the-box with
|
||||
`natsorted()`_ (as of `natsort`_ version >= 4.0.0):
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10']
|
||||
>>> natsorted(a)
|
||||
['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0']
|
||||
|
||||
If you need to versions that use a more complicated scheme, please see
|
||||
`these version sorting examples`_.
|
||||
|
||||
Sort Paths Like My File Browser (e.g. Windows Explorer on Windows)
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Prior to `natsort`_ version 7.1.0, it was a common request to be able to
|
||||
sort paths like Windows Explorer. As of `natsort`_ 7.1.0, the function
|
||||
`os_sorted()`_ has been added to provide users the ability to sort
|
||||
in the order that their file browser might sort (e.g Windows Explorer on
|
||||
Windows, Finder on MacOS, Dolphin/Nautilus/Thunar/etc. on Linux).
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import os
|
||||
from natsort import os_sorted
|
||||
print(os_sorted(os.listdir()))
|
||||
# The directory sorted like your file browser might show
|
||||
|
||||
Output will be different depending on the operating system you are on.
|
||||
|
||||
For users **not** on Windows (e.g. MacOS/Linux) it is **strongly** recommended
|
||||
to also install `PyICU`_, which will help
|
||||
`natsort`_ give results that match most file browsers. If this is not installed,
|
||||
it will fall back on Python's built-in `locale`_ module and will give good
|
||||
results for most input, but will give poor results for special characters.
|
||||
|
||||
Sorting by Real Numbers (i.e. Signed Floats)
|
||||
++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
This is useful in scientific data analysis (and was the default behavior
|
||||
of `natsorted()`_ for `natsort`_ version < 4.0.0). Use the `realsorted()`_
|
||||
function:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from natsort import realsorted, ns
|
||||
>>> # Note that when interpreting as signed floats, the below numbers are
|
||||
>>> # +5.10, -3.00, +5.30, +2.00
|
||||
>>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data']
|
||||
>>> natsorted(a)
|
||||
['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data']
|
||||
>>> natsorted(a, alg=ns.REAL)
|
||||
['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
|
||||
>>> realsorted(a) # shortcut for natsorted with alg=ns.REAL
|
||||
['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
|
||||
|
||||
Locale-Aware Sorting (or "Human Sorting")
|
||||
+++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
This is where the non-numeric characters are also ordered based on their
|
||||
meaning, not on their ordinal value, and a locale-dependent thousands
|
||||
separator and decimal separator is accounted for in the number.
|
||||
This can be achieved with the `humansorted()`_ function:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
|
||||
>>> natsorted(a)
|
||||
['Apple', 'Banana', 'apple14,689', 'apple15', 'banana']
|
||||
>>> import locale
|
||||
>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
||||
'en_US.UTF-8'
|
||||
>>> natsorted(a, alg=ns.LOCALE)
|
||||
['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
|
||||
>>> from natsort import humansorted
|
||||
>>> humansorted(a) # shortcut for natsorted with alg=ns.LOCALE
|
||||
['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
|
||||
|
||||
You may find you need to explicitly set the locale to get this to work
|
||||
(as shown in the example). Please see `locale issues`_ and the
|
||||
`Optional Dependencies`_ section below before using the `humansorted()`_ function.
|
||||
|
||||
Further Customizing Natsort
|
||||
+++++++++++++++++++++++++++
|
||||
|
||||
If you need to combine multiple algorithm modifiers (such as ``ns.REAL``,
|
||||
``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the
|
||||
bitwise OR operator (``|``). For example,
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
|
||||
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE)
|
||||
['Apple', 'apple15', 'apple14,689', 'Banana', 'banana']
|
||||
>>> # The ns enum provides long and short forms for each option.
|
||||
>>> ns.LOCALE == ns.L
|
||||
True
|
||||
>>> # You can also customize the convenience functions, too.
|
||||
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == realsorted(a, alg=ns.L | ns.IC)
|
||||
True
|
||||
>>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == humansorted(a, alg=ns.R | ns.IC)
|
||||
True
|
||||
|
||||
All of the available customizations can be found in the documentation for
|
||||
`the ns enum`_.
|
||||
|
||||
You can also add your own custom transformation functions with the ``key``
|
||||
argument. These can be used with ``alg`` if you wish.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['apple2.50', '2.3apple']
|
||||
>>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL)
|
||||
['2.3apple', 'apple2.50']
|
||||
|
||||
Sorting Mixed Types
|
||||
+++++++++++++++++++
|
||||
|
||||
You can mix and match `int`_, `float`_, and `str`_ types when you sort:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> a = ['4.5', 6, 2.0, '5', 'a']
|
||||
>>> natsorted(a)
|
||||
[2.0, '4.5', '5', 6, 'a']
|
||||
>>> # sorted(a) would raise an "unorderable types" TypeError
|
||||
|
||||
Handling Bytes
|
||||
++++++++++++++
|
||||
|
||||
`natsort`_ does not officially support the `bytes`_ type, but
|
||||
convenience functions are provided that help you decode to `str`_ first:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from natsort import as_utf8
|
||||
>>> a = [b'a', 14.0, 'b']
|
||||
>>> # natsorted(a) would raise a TypeError (bytes() < str())
|
||||
>>> natsorted(a, key=as_utf8) == [14.0, b'a', 'b']
|
||||
True
|
||||
>>> a = [b'a56', b'a5', b'a6', b'a40']
|
||||
>>> # natsorted(a) would return the same results as sorted(a)
|
||||
>>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56']
|
||||
True
|
||||
|
||||
Generating a Reusable Sorting Key and Sorting In-Place
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
Under the hood, `natsorted()`_ works by generating a custom sorting
|
||||
key using `natsort_keygen()`_ and then passes that to the built-in
|
||||
`sorted()`_. You can use the `natsort_keygen()`_ function yourself to
|
||||
generate a custom sorting key to sort in-place using the `list.sort()`_
|
||||
method.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from natsort import natsort_keygen
|
||||
>>> natsort_key = natsort_keygen()
|
||||
>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
|
||||
>>> natsorted(a) == sorted(a, key=natsort_key)
|
||||
True
|
||||
>>> a.sort(key=natsort_key)
|
||||
>>> a
|
||||
['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
|
||||
|
||||
All of the algorithm customizations mentioned in the
|
||||
`Further Customizing Natsort`_ section can also be applied to
|
||||
`natsort_keygen()`_ through the *alg* keyword option.
|
||||
|
||||
Other Useful Things
|
||||
+++++++++++++++++++
|
||||
|
||||
- recursively descend into lists of lists
|
||||
- automatic unicode normalization of input data
|
||||
- `controlling the case-sensitivity`_
|
||||
- `sorting file paths correctly`_
|
||||
- `allow custom sorting keys`_
|
||||
- `accounting for units`_
|
||||
|
||||
FAQ
|
||||
---
|
||||
|
||||
How do I debug `natsorted()`_?
|
||||
The best way to debug `natsorted()`_ is to generate a key using `natsort_keygen()`_
|
||||
with the same options being passed to `natsorted()`_. One can take a look at
|
||||
exactly what is being done with their input using this key - it is highly
|
||||
recommended to `look at this issue describing how to debug`_ for *how* to debug,
|
||||
and also to review the `How Does Natsort Work?`_ page for *why* `natsort`_ is
|
||||
doing that to your data.
|
||||
|
||||
If you are trying to sort custom classes and running into trouble, please
|
||||
take a look at https://github.com/SethMMorton/natsort/issues/60. In short,
|
||||
custom classes are not likely to be sorted correctly if one relies
|
||||
on the behavior of ``__lt__`` and the other rich comparison operators in
|
||||
their custom class - it is better to use a ``key`` function with
|
||||
`natsort`_, or use the `natsort`_ key as part of your rich comparison
|
||||
operator definition.
|
||||
|
||||
`natsort`_ gave me results I didn't expect, and it's a terrible library!
|
||||
Did you try to debug using the above advice? If so, and you still cannot figure out
|
||||
the error, then please `file an issue`_.
|
||||
|
||||
How *does* `natsort`_ work?
|
||||
If you don't want to read `How Does Natsort Work?`_,
|
||||
here is a quick primer.
|
||||
|
||||
`natsort`_ provides a `key function`_ that can be passed to `list.sort()`_
|
||||
or `sorted()`_ in order to modify the default sorting behavior. This key
|
||||
is generated on-demand with the key generator `natsort_keygen()`_.
|
||||
`natsorted()`_ is essentially a wrapper for the following code:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from natsort import natsort_keygen
|
||||
>>> natsort_key = natsort_keygen()
|
||||
>>> sorted(['1', '10', '2'], key=natsort_key)
|
||||
['1', '2', '10']
|
||||
|
||||
Users can further customize `natsort`_ sorting behavior with the ``key``
|
||||
and/or ``alg`` options (see details in the `Further Customizing Natsort`_
|
||||
section).
|
||||
|
||||
The key generated by `natsort_keygen()`_ *always* returns a `tuple`_. It
|
||||
does so in the following way (*some details omitted for clarity*):
|
||||
|
||||
1. Assume the input is a string, and attempt to split it into numbers and
|
||||
non-numbers using regular expressions. Numbers are then converted into
|
||||
either `int`_ or `float`_.
|
||||
2. If the above fails because the input is not a string, assume the input
|
||||
is some other sequence (e.g. `list`_ or `tuple`_), and recursively
|
||||
apply the key to each element of the sequence.
|
||||
3. If the above fails because the input is not iterable, assume the input
|
||||
is an `int`_ or `float`_, and just return the input in a `tuple`_.
|
||||
|
||||
Because a `tuple`_ is always returned, a `TypeError`_ should not be common
|
||||
unless one tries to do something odd like sort an `int`_ against a `list`_.
|
||||
|
||||
Shell script
|
||||
------------
|
||||
|
||||
`natsort`_ comes with a shell script called `natsort`_, or can also be called
|
||||
from the command line with ``python -m natsort``. Check out the
|
||||
`shell script wiki documentation`_ for more details.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
`natsort`_ requires Python 3.7 or greater.
|
||||
|
||||
Optional Dependencies
|
||||
---------------------
|
||||
|
||||
fastnumbers
|
||||
+++++++++++
|
||||
|
||||
The most efficient sorting can occur if you install the
|
||||
`fastnumbers`_ package
|
||||
(version >=2.0.0); it helps with the string to number conversions.
|
||||
`natsort`_ will still run (efficiently) without the package, but if you need
|
||||
to squeeze out that extra juice it is recommended you include this as a
|
||||
dependency. `natsort`_ will not require (or check) that
|
||||
`fastnumbers`_ is installed at installation.
|
||||
|
||||
PyICU
|
||||
+++++
|
||||
|
||||
It is recommended that you install `PyICU`_ if you wish to sort in a
|
||||
locale-dependent manner, see this page on `locale issues`_ for an explanation why.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
Use ``pip``!
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ pip install natsort
|
||||
|
||||
If you want to install the `Optional Dependencies`_, you can use the
|
||||
`"extras" notation`_ at installation time to install those dependencies as
|
||||
well - use ``fast`` for `fastnumbers`_ and ``icu`` for `PyICU`_.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
# Install both optional dependencies.
|
||||
$ pip install natsort[fast,icu]
|
||||
# Install just fastnumbers
|
||||
$ pip install natsort[fast]
|
||||
|
||||
How to Run Tests
|
||||
----------------
|
||||
|
||||
Please note that `natsort`_ is NOT set-up to support ``python setup.py test``.
|
||||
|
||||
The recommended way to run tests is with `tox`_. After installing ``tox``,
|
||||
running tests is as simple as executing the following in the `natsort`_ directory:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ tox
|
||||
|
||||
``tox`` will create virtual a virtual environment for your tests and install
|
||||
all the needed testing requirements for you. You can specify a particular
|
||||
python version with the ``-e`` flag, e.g. ``tox -e py36``. Static analysis
|
||||
is done with ``tox -e flake8``. You can see all available testing environments
|
||||
with ``tox --listenvs``.
|
||||
|
||||
How to Build Documentation
|
||||
--------------------------
|
||||
|
||||
If you want to build the documentation for `natsort`_, it is recommended to
|
||||
use ``tox``:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ tox -e docs
|
||||
|
||||
This will place the documentation in ``build/sphinx/html``.
|
||||
|
||||
Dropped Deprecated APIs
|
||||
-----------------------
|
||||
|
||||
In `natsort`_ version 6.0.0, the following APIs and functions were removed
|
||||
|
||||
- ``number_type`` keyword argument (deprecated since 3.4.0)
|
||||
- ``signed`` keyword argument (deprecated since 3.4.0)
|
||||
- ``exp`` keyword argument (deprecated since 3.4.0)
|
||||
- ``as_path`` keyword argument (deprecated since 3.4.0)
|
||||
- ``py3_safe`` keyword argument (deprecated since 3.4.0)
|
||||
- ``ns.TYPESAFE`` (deprecated since version 5.0.0)
|
||||
- ``ns.DIGIT`` (deprecated since version 5.0.0)
|
||||
- ``ns.VERSION`` (deprecated since version 5.0.0)
|
||||
- ``versorted()`` (discouraged since version 4.0.0,
|
||||
officially deprecated since version 5.5.0)
|
||||
- ``index_versorted()`` (discouraged since version 4.0.0,
|
||||
officially deprecated since version 5.5.0)
|
||||
|
||||
In general, if you want to determine if you are using deprecated APIs you
|
||||
can run your code with the following flag
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ python -Wdefault::DeprecationWarning my-code.py
|
||||
|
||||
By default `DeprecationWarnings`_ are not shown, but this will cause them
|
||||
to be shown. Alternatively, you can just set the environment variable
|
||||
``PYTHONWARNINGS`` to "default::DeprecationWarning" and then run your code.
|
||||
|
||||
Author
|
||||
------
|
||||
|
||||
Seth M. Morton
|
||||
|
||||
History
|
||||
-------
|
||||
|
||||
Please visit the changelog `on GitHub`_.
|
||||
|
||||
.. _natsort: https://natsort.readthedocs.io/en/stable/index.html
|
||||
.. _natsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsorted
|
||||
.. _natsort_keygen(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsort_keygen
|
||||
.. _realsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.realsorted
|
||||
.. _humansorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.humansorted
|
||||
.. _os_sorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.os_sorted
|
||||
.. _the ns enum: https://natsort.readthedocs.io/en/stable/api.html#natsort.ns
|
||||
.. _fastnumbers: https://github.com/SethMMorton/fastnumbers
|
||||
.. _sorted(): https://docs.python.org/3/library/functions.html#sorted
|
||||
.. _list.sort(): https://docs.python.org/3/library/stdtypes.html#list.sort
|
||||
.. _key function: https://docs.python.org/3/howto/sorting.html#key-functions
|
||||
.. _locale: https://docs.python.org/3/library/locale.html
|
||||
.. _int: https://docs.python.org/3/library/functions.html#int
|
||||
.. _float: https://docs.python.org/3/library/functions.html#float
|
||||
.. _str: https://docs.python.org/3/library/stdtypes.html#str
|
||||
.. _bytes: https://docs.python.org/3/library/stdtypes.html#bytes
|
||||
.. _list: https://docs.python.org/3/library/stdtypes.html#list
|
||||
.. _tuple: https://docs.python.org/3/library/stdtypes.html#tuple
|
||||
.. _TypeError: https://docs.python.org/3/library/exceptions.html#TypeError
|
||||
.. _DeprecationWarnings: https://docs.python.org/3/library/exceptions.html#DeprecationWarning
|
||||
.. _"extras" notation: https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras
|
||||
.. _PyICU: https://pypi.org/project/PyICU
|
||||
.. _tox: https://tox.readthedocs.io/en/latest/
|
||||
.. _Examples and Recipes: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes
|
||||
.. _How Does Natsort Work?: https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F
|
||||
.. _API: https://natsort.readthedocs.io/en/stable/api.html
|
||||
.. _on GitHub: https://github.com/SethMMorton/natsort/blob/main/CHANGELOG.md
|
||||
.. _file an issue: https://github.com/SethMMorton/natsort/issues/new
|
||||
.. _look at this issue describing how to debug: https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375
|
||||
.. _controlling the case-sensitivity: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#controlling-case-when-sorting
|
||||
.. _sorting file paths correctly: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sort-os-generated-paths
|
||||
.. _allow custom sorting keys: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#using-a-custom-sorting-key
|
||||
.. _accounting for units: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#accounting-for-units-when-sorting
|
||||
.. _these version sorting examples: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sorting-more-expressive-versioning-schemes
|
||||
.. _locale issues: https://github.com/SethMMorton/natsort/wiki/Possible-Issues-with-natsort.humansorted-or-ns.LOCALE
|
||||
.. _shell script wiki documentation: https://github.com/SethMMorton/natsort/wiki/Shell-Script
|
||||
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for platformdirs
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for platformdirs
|
||||
</h1>
|
||||
<a href="/platformdirs/platformdirs-4.5.0-py3-none-any.whl#sha256=e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3" data-requires-python=">=3.10" data-dist-info-metadata="sha256=985c5997a43e7ceda709d5960894f858eaf8a7b535da3657e25936e8ca86cb5a">
|
||||
platformdirs-4.5.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,350 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: platformdirs
|
||||
Version: 4.5.0
|
||||
Summary: A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`.
|
||||
Project-URL: Changelog, https://github.com/tox-dev/platformdirs/releases
|
||||
Project-URL: Documentation, https://platformdirs.readthedocs.io
|
||||
Project-URL: Homepage, https://github.com/tox-dev/platformdirs
|
||||
Project-URL: Source, https://github.com/tox-dev/platformdirs
|
||||
Project-URL: Tracker, https://github.com/tox-dev/platformdirs/issues
|
||||
Maintainer-email: Bernát Gábor <gaborjbernat@gmail.com>, Julian Berman <Julian@GrayVines.com>, Ofek Lev <oss@ofek.dev>, Ronny Pfannschmidt <opensource@ronnypfannschmidt.de>
|
||||
License-Expression: MIT
|
||||
License-File: LICENSE
|
||||
Keywords: appdirs,application,cache,directory,log,user
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Requires-Python: >=3.10
|
||||
Provides-Extra: docs
|
||||
Requires-Dist: furo>=2025.9.25; extra == 'docs'
|
||||
Requires-Dist: proselint>=0.14; extra == 'docs'
|
||||
Requires-Dist: sphinx-autodoc-typehints>=3.2; extra == 'docs'
|
||||
Requires-Dist: sphinx>=8.2.3; extra == 'docs'
|
||||
Provides-Extra: test
|
||||
Requires-Dist: appdirs==1.4.4; extra == 'test'
|
||||
Requires-Dist: covdefaults>=2.3; extra == 'test'
|
||||
Requires-Dist: pytest-cov>=7; extra == 'test'
|
||||
Requires-Dist: pytest-mock>=3.15.1; extra == 'test'
|
||||
Requires-Dist: pytest>=8.4.2; extra == 'test'
|
||||
Provides-Extra: type
|
||||
Requires-Dist: mypy>=1.18.2; extra == 'type'
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
The problem
|
||||
===========
|
||||
|
||||
.. image:: https://badge.fury.io/py/platformdirs.svg
|
||||
:target: https://badge.fury.io/py/platformdirs
|
||||
.. image:: https://img.shields.io/pypi/pyversions/platformdirs.svg
|
||||
:target: https://pypi.python.org/pypi/platformdirs/
|
||||
.. image:: https://github.com/tox-dev/platformdirs/actions/workflows/check.yaml/badge.svg
|
||||
:target: https://github.com/platformdirs/platformdirs/actions
|
||||
.. image:: https://static.pepy.tech/badge/platformdirs/month
|
||||
:target: https://pepy.tech/project/platformdirs
|
||||
|
||||
When writing desktop application, finding the right location to store user data
|
||||
and configuration varies per platform. Even for single-platform apps, there
|
||||
may by plenty of nuances in figuring out the right location.
|
||||
|
||||
For example, if running on macOS, you should use::
|
||||
|
||||
~/Library/Application Support/<AppName>
|
||||
|
||||
If on Windows (at least English Win) that should be::
|
||||
|
||||
C:\Users\<User>\Application Data\Local Settings\<AppAuthor>\<AppName>
|
||||
|
||||
or possibly::
|
||||
|
||||
C:\Users\<User>\Application Data\<AppAuthor>\<AppName>
|
||||
|
||||
for `roaming profiles <https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-vista/cc766489(v=ws.10)>`_ but that is another story.
|
||||
|
||||
On Linux (and other Unices), according to the `XDG Basedir Spec`_, it should be::
|
||||
|
||||
~/.local/share/<AppName>
|
||||
|
||||
.. _XDG Basedir Spec: https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
||||
|
||||
``platformdirs`` to the rescue
|
||||
==============================
|
||||
|
||||
This kind of thing is what the ``platformdirs`` package is for.
|
||||
``platformdirs`` will help you choose an appropriate:
|
||||
|
||||
- user data dir (``user_data_dir``)
|
||||
- user config dir (``user_config_dir``)
|
||||
- user cache dir (``user_cache_dir``)
|
||||
- site data dir (``site_data_dir``)
|
||||
- site config dir (``site_config_dir``)
|
||||
- user log dir (``user_log_dir``)
|
||||
- user documents dir (``user_documents_dir``)
|
||||
- user downloads dir (``user_downloads_dir``)
|
||||
- user pictures dir (``user_pictures_dir``)
|
||||
- user videos dir (``user_videos_dir``)
|
||||
- user music dir (``user_music_dir``)
|
||||
- user desktop dir (``user_desktop_dir``)
|
||||
- user runtime dir (``user_runtime_dir``)
|
||||
|
||||
And also:
|
||||
|
||||
- Is slightly opinionated on the directory names used. Look for "OPINION" in
|
||||
documentation and code for when an opinion is being applied.
|
||||
|
||||
Example output
|
||||
==============
|
||||
|
||||
On macOS:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from platformdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> user_config_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Caches/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> site_config_dir(appname, appauthor)
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Logs/SuperApp'
|
||||
>>> user_documents_dir()
|
||||
'/Users/trentm/Documents'
|
||||
>>> user_downloads_dir()
|
||||
'/Users/trentm/Downloads'
|
||||
>>> user_pictures_dir()
|
||||
'/Users/trentm/Pictures'
|
||||
>>> user_videos_dir()
|
||||
'/Users/trentm/Movies'
|
||||
>>> user_music_dir()
|
||||
'/Users/trentm/Music'
|
||||
>>> user_desktop_dir()
|
||||
'/Users/trentm/Desktop'
|
||||
>>> user_runtime_dir(appname, appauthor)
|
||||
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp'
|
||||
|
||||
On Windows:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from platformdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
|
||||
>>> user_data_dir(appname, appauthor, roaming=True)
|
||||
'C:\\Users\\trentm\\AppData\\Roaming\\Acme\\SuperApp'
|
||||
>>> user_config_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Cache'
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'C:\\ProgramData\\Acme\\SuperApp'
|
||||
>>> site_config_dir(appname, appauthor)
|
||||
'C:\\ProgramData\\Acme\\SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Acme\\SuperApp\\Logs'
|
||||
>>> user_documents_dir()
|
||||
'C:\\Users\\trentm\\Documents'
|
||||
>>> user_downloads_dir()
|
||||
'C:\\Users\\trentm\\Downloads'
|
||||
>>> user_pictures_dir()
|
||||
'C:\\Users\\trentm\\Pictures'
|
||||
>>> user_videos_dir()
|
||||
'C:\\Users\\trentm\\Videos'
|
||||
>>> user_music_dir()
|
||||
'C:\\Users\\trentm\\Music'
|
||||
>>> user_desktop_dir()
|
||||
'C:\\Users\\trentm\\Desktop'
|
||||
>>> user_runtime_dir(appname, appauthor)
|
||||
'C:\\Users\\trentm\\AppData\\Local\\Temp\\Acme\\SuperApp'
|
||||
|
||||
On Linux:
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from platformdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'/home/trentm/.local/share/SuperApp'
|
||||
>>> user_config_dir(appname)
|
||||
'/home/trentm/.config/SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'/home/trentm/.cache/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'/usr/local/share/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor, multipath=True)
|
||||
'/usr/local/share/SuperApp:/usr/share/SuperApp'
|
||||
>>> site_config_dir(appname)
|
||||
'/etc/xdg/SuperApp'
|
||||
>>> os.environ["XDG_CONFIG_DIRS"] = "/etc:/usr/local/etc"
|
||||
>>> site_config_dir(appname, multipath=True)
|
||||
'/etc/SuperApp:/usr/local/etc/SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'/home/trentm/.local/state/SuperApp/log'
|
||||
>>> user_documents_dir()
|
||||
'/home/trentm/Documents'
|
||||
>>> user_downloads_dir()
|
||||
'/home/trentm/Downloads'
|
||||
>>> user_pictures_dir()
|
||||
'/home/trentm/Pictures'
|
||||
>>> user_videos_dir()
|
||||
'/home/trentm/Videos'
|
||||
>>> user_music_dir()
|
||||
'/home/trentm/Music'
|
||||
>>> user_desktop_dir()
|
||||
'/home/trentm/Desktop'
|
||||
>>> user_runtime_dir(appname, appauthor)
|
||||
'/run/user/{os.getuid()}/SuperApp'
|
||||
|
||||
On Android::
|
||||
|
||||
>>> from platformdirs import *
|
||||
>>> appname = "SuperApp"
|
||||
>>> appauthor = "Acme"
|
||||
>>> user_data_dir(appname, appauthor)
|
||||
'/data/data/com.myApp/files/SuperApp'
|
||||
>>> user_config_dir(appname)
|
||||
'/data/data/com.myApp/shared_prefs/SuperApp'
|
||||
>>> user_cache_dir(appname, appauthor)
|
||||
'/data/data/com.myApp/cache/SuperApp'
|
||||
>>> site_data_dir(appname, appauthor)
|
||||
'/data/data/com.myApp/files/SuperApp'
|
||||
>>> site_config_dir(appname)
|
||||
'/data/data/com.myApp/shared_prefs/SuperApp'
|
||||
>>> user_log_dir(appname, appauthor)
|
||||
'/data/data/com.myApp/cache/SuperApp/log'
|
||||
>>> user_documents_dir()
|
||||
'/storage/emulated/0/Documents'
|
||||
>>> user_downloads_dir()
|
||||
'/storage/emulated/0/Downloads'
|
||||
>>> user_pictures_dir()
|
||||
'/storage/emulated/0/Pictures'
|
||||
>>> user_videos_dir()
|
||||
'/storage/emulated/0/DCIM/Camera'
|
||||
>>> user_music_dir()
|
||||
'/storage/emulated/0/Music'
|
||||
>>> user_desktop_dir()
|
||||
'/storage/emulated/0/Desktop'
|
||||
>>> user_runtime_dir(appname, appauthor)
|
||||
'/data/data/com.myApp/cache/SuperApp/tmp'
|
||||
|
||||
Note: Some android apps like Termux and Pydroid are used as shells. These
|
||||
apps are used by the end user to emulate Linux environment. Presence of
|
||||
``SHELL`` environment variable is used by Platformdirs to differentiate
|
||||
between general android apps and android apps used as shells. Shell android
|
||||
apps also support ``XDG_*`` environment variables.
|
||||
|
||||
|
||||
``PlatformDirs`` for convenience
|
||||
================================
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> from platformdirs import PlatformDirs
|
||||
>>> dirs = PlatformDirs("SuperApp", "Acme")
|
||||
>>> dirs.user_data_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> dirs.user_config_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp'
|
||||
>>> dirs.user_cache_dir
|
||||
'/Users/trentm/Library/Caches/SuperApp'
|
||||
>>> dirs.site_data_dir
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> dirs.site_config_dir
|
||||
'/Library/Application Support/SuperApp'
|
||||
>>> dirs.user_cache_dir
|
||||
'/Users/trentm/Library/Caches/SuperApp'
|
||||
>>> dirs.user_log_dir
|
||||
'/Users/trentm/Library/Logs/SuperApp'
|
||||
>>> dirs.user_documents_dir
|
||||
'/Users/trentm/Documents'
|
||||
>>> dirs.user_downloads_dir
|
||||
'/Users/trentm/Downloads'
|
||||
>>> dirs.user_pictures_dir
|
||||
'/Users/trentm/Pictures'
|
||||
>>> dirs.user_videos_dir
|
||||
'/Users/trentm/Movies'
|
||||
>>> dirs.user_music_dir
|
||||
'/Users/trentm/Music'
|
||||
>>> dirs.user_desktop_dir
|
||||
'/Users/trentm/Desktop'
|
||||
>>> dirs.user_runtime_dir
|
||||
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp'
|
||||
|
||||
Per-version isolation
|
||||
=====================
|
||||
|
||||
If you have multiple versions of your app in use that you want to be
|
||||
able to run side-by-side, then you may want version-isolation for these
|
||||
dirs::
|
||||
|
||||
>>> from platformdirs import PlatformDirs
|
||||
>>> dirs = PlatformDirs("SuperApp", "Acme", version="1.0")
|
||||
>>> dirs.user_data_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.user_config_dir
|
||||
'/Users/trentm/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.user_cache_dir
|
||||
'/Users/trentm/Library/Caches/SuperApp/1.0'
|
||||
>>> dirs.site_data_dir
|
||||
'/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.site_config_dir
|
||||
'/Library/Application Support/SuperApp/1.0'
|
||||
>>> dirs.user_log_dir
|
||||
'/Users/trentm/Library/Logs/SuperApp/1.0'
|
||||
>>> dirs.user_documents_dir
|
||||
'/Users/trentm/Documents'
|
||||
>>> dirs.user_downloads_dir
|
||||
'/Users/trentm/Downloads'
|
||||
>>> dirs.user_pictures_dir
|
||||
'/Users/trentm/Pictures'
|
||||
>>> dirs.user_videos_dir
|
||||
'/Users/trentm/Movies'
|
||||
>>> dirs.user_music_dir
|
||||
'/Users/trentm/Music'
|
||||
>>> dirs.user_desktop_dir
|
||||
'/Users/trentm/Desktop'
|
||||
>>> dirs.user_runtime_dir
|
||||
'/Users/trentm/Library/Caches/TemporaryItems/SuperApp/1.0'
|
||||
|
||||
Be wary of using this for configuration files though; you'll need to handle
|
||||
migrating configuration files manually.
|
||||
|
||||
Why this Fork?
|
||||
==============
|
||||
|
||||
This repository is a friendly fork of the wonderful work started by
|
||||
`ActiveState <https://github.com/ActiveState/appdirs>`_ who created
|
||||
``appdirs``, this package's ancestor.
|
||||
|
||||
Maintaining an open source project is no easy task, particularly
|
||||
from within an organization, and the Python community is indebted
|
||||
to ``appdirs`` (and to Trent Mick and Jeff Rouse in particular) for
|
||||
creating an incredibly useful simple module, as evidenced by the wide
|
||||
number of users it has attracted over the years.
|
||||
|
||||
Nonetheless, given the number of long-standing open issues
|
||||
and pull requests, and no clear path towards `ensuring
|
||||
that maintenance of the package would continue or grow
|
||||
<https://github.com/ActiveState/appdirs/issues/79>`_, this fork was
|
||||
created.
|
||||
|
||||
Contributions are most welcome.
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for pyproject-parser
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for pyproject-parser
|
||||
</h1>
|
||||
<a href="/pyproject-parser/pyproject_parser-0.13.0-py3-none-any.whl#sha256=e967a66e84ade12497016af86f25a60d0fdb8c8e6e4bae0dc9d3707aeaf03bb5" data-requires-python=">=3.6.1" data-dist-info-metadata="sha256=367990763f992d283c2e5399d1342a04362cd8ac807c7739abacad041cc4c440">
|
||||
pyproject_parser-0.13.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,219 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: pyproject-parser
|
||||
Version: 0.13.0
|
||||
Summary: Parser for 'pyproject.toml'
|
||||
Project-URL: Homepage, https://github.com/repo-helper/pyproject-parser
|
||||
Project-URL: Issue Tracker, https://github.com/repo-helper/pyproject-parser/issues
|
||||
Project-URL: Source Code, https://github.com/repo-helper/pyproject-parser
|
||||
Project-URL: Documentation, https://pyproject-parser.readthedocs.io/en/latest
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2021 Dominic Davis-Foster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
License-File: LICENSE
|
||||
Keywords: metadata,packaging,pep518,pep621,pyproject,toml
|
||||
Classifier: Development Status :: 4 - Beta
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6.1
|
||||
Requires-Dist: apeye-core>=1.0.0
|
||||
Requires-Dist: attrs>=20.3.0
|
||||
Requires-Dist: dom-toml>=2.0.0
|
||||
Requires-Dist: domdf-python-tools>=2.8.0
|
||||
Requires-Dist: natsort>=7.1.1
|
||||
Requires-Dist: packaging>=20.9
|
||||
Requires-Dist: shippinglabel>=1.0.0
|
||||
Requires-Dist: typing-extensions!=4.7.0,>=3.7.4.3
|
||||
Provides-Extra: all
|
||||
Requires-Dist: click>=7.1.2; extra == 'all'
|
||||
Requires-Dist: consolekit>=1.4.1; extra == 'all'
|
||||
Requires-Dist: docutils>=0.16; extra == 'all'
|
||||
Requires-Dist: readme-renderer[md]>=27.0; extra == 'all'
|
||||
Requires-Dist: sdjson>=0.3.1; extra == 'all'
|
||||
Provides-Extra: cli
|
||||
Requires-Dist: click>=7.1.2; extra == 'cli'
|
||||
Requires-Dist: consolekit>=1.4.1; extra == 'cli'
|
||||
Requires-Dist: sdjson>=0.3.1; extra == 'cli'
|
||||
Provides-Extra: readme
|
||||
Requires-Dist: docutils>=0.16; extra == 'readme'
|
||||
Requires-Dist: readme-renderer[md]>=27.0; extra == 'readme'
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
#################
|
||||
pyproject-parser
|
||||
#################
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Parser for 'pyproject.toml'**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/pyproject-parser/latest?logo=read-the-docs
|
||||
:target: https://pyproject-parser.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/repo-helper/pyproject-parser/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/repo-helper/pyproject-parser/workflows/Linux/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/repo-helper/pyproject-parser/workflows/Windows/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/repo-helper/pyproject-parser/workflows/macOS/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/repo-helper/pyproject-parser/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/repo-helper/pyproject-parser/workflows/mypy/badge.svg
|
||||
:target: https://github.com/repo-helper/pyproject-parser/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/pyproject-parser/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/pyproject-parser/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/pyproject-parser/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/repo-helper/pyproject-parser?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/pyproject-parser?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/repo-helper/pyproject-parser
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/pyproject-parser
|
||||
:target: https://pypi.org/project/pyproject-parser/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/pyproject-parser?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/pyproject-parser/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/pyproject-parser
|
||||
:target: https://pypi.org/project/pyproject-parser/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/pyproject-parser
|
||||
:target: https://pypi.org/project/pyproject-parser/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/pyproject-parser?logo=anaconda
|
||||
:target: https://anaconda.org/conda-forge/pyproject-parser
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/pyproject-parser?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/conda-forge/pyproject-parser
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/repo-helper/pyproject-parser
|
||||
:target: https://github.com/repo-helper/pyproject-parser/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/pyproject-parser
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/pyproject-parser/v0.13.0
|
||||
:target: https://github.com/repo-helper/pyproject-parser/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/pyproject-parser
|
||||
:target: https://github.com/repo-helper/pyproject-parser/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/pyproject-parser
|
||||
:target: https://pypi.org/project/pyproject-parser/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``pyproject-parser`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install pyproject-parser
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install -c conda-forge pyproject-parser
|
||||
|
||||
.. end installation
|
||||
|
||||
``pyproject-parser`` also has an optional README validation feature, which checks the README will render correctly on PyPI.
|
||||
This requires that the ``readme`` extra is installed:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install pyproject-parser[readme]
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for requests
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for requests
|
||||
</h1>
|
||||
<a href="/requests/requests-2.32.5-py3-none-any.whl#sha256=2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" data-requires-python=">=3.9" data-dist-info-metadata="sha256=65b5a08da81f4915513e760965ff14b7518f65b7bb3efe0dab364bbcc4d40cb0">
|
||||
requests-2.32.5-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,133 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: requests
|
||||
Version: 2.32.5
|
||||
Summary: Python HTTP for Humans.
|
||||
Home-page: https://requests.readthedocs.io
|
||||
Author: Kenneth Reitz
|
||||
Author-email: me@kennethreitz.org
|
||||
License: Apache-2.0
|
||||
Project-URL: Documentation, https://requests.readthedocs.io
|
||||
Project-URL: Source, https://github.com/psf/requests
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Environment :: Web Environment
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: Apache Software License
|
||||
Classifier: Natural Language :: English
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Internet :: WWW/HTTP
|
||||
Classifier: Topic :: Software Development :: Libraries
|
||||
Requires-Python: >=3.9
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Requires-Dist: charset_normalizer<4,>=2
|
||||
Requires-Dist: idna<4,>=2.5
|
||||
Requires-Dist: urllib3<3,>=1.21.1
|
||||
Requires-Dist: certifi>=2017.4.17
|
||||
Provides-Extra: security
|
||||
Provides-Extra: socks
|
||||
Requires-Dist: PySocks!=1.5.7,>=1.5.6; extra == "socks"
|
||||
Provides-Extra: use-chardet-on-py3
|
||||
Requires-Dist: chardet<6,>=3.0.2; extra == "use-chardet-on-py3"
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: description-content-type
|
||||
Dynamic: home-page
|
||||
Dynamic: license
|
||||
Dynamic: license-file
|
||||
Dynamic: project-url
|
||||
Dynamic: provides-extra
|
||||
Dynamic: requires-dist
|
||||
Dynamic: requires-python
|
||||
Dynamic: summary
|
||||
|
||||
# Requests
|
||||
|
||||
**Requests** is a simple, yet elegant, HTTP library.
|
||||
|
||||
```python
|
||||
>>> import requests
|
||||
>>> r = requests.get('https://httpbin.org/basic-auth/user/pass', auth=('user', 'pass'))
|
||||
>>> r.status_code
|
||||
200
|
||||
>>> r.headers['content-type']
|
||||
'application/json; charset=utf8'
|
||||
>>> r.encoding
|
||||
'utf-8'
|
||||
>>> r.text
|
||||
'{"authenticated": true, ...'
|
||||
>>> r.json()
|
||||
{'authenticated': True, ...}
|
||||
```
|
||||
|
||||
Requests allows you to send HTTP/1.1 requests extremely easily. There’s no need to manually add query strings to your URLs, or to form-encode your `PUT` & `POST` data — but nowadays, just use the `json` method!
|
||||
|
||||
Requests is one of the most downloaded Python packages today, pulling in around `30M downloads / week`— according to GitHub, Requests is currently [depended upon](https://github.com/psf/requests/network/dependents?package_id=UGFja2FnZS01NzA4OTExNg%3D%3D) by `1,000,000+` repositories. You may certainly put your trust in this code.
|
||||
|
||||
[](https://pepy.tech/project/requests)
|
||||
[](https://pypi.org/project/requests)
|
||||
[](https://github.com/psf/requests/graphs/contributors)
|
||||
|
||||
## Installing Requests and Supported Versions
|
||||
|
||||
Requests is available on PyPI:
|
||||
|
||||
```console
|
||||
$ python -m pip install requests
|
||||
```
|
||||
|
||||
Requests officially supports Python 3.9+.
|
||||
|
||||
## Supported Features & Best–Practices
|
||||
|
||||
Requests is ready for the demands of building robust and reliable HTTP–speaking applications, for the needs of today.
|
||||
|
||||
- Keep-Alive & Connection Pooling
|
||||
- International Domains and URLs
|
||||
- Sessions with Cookie Persistence
|
||||
- Browser-style TLS/SSL Verification
|
||||
- Basic & Digest Authentication
|
||||
- Familiar `dict`–like Cookies
|
||||
- Automatic Content Decompression and Decoding
|
||||
- Multi-part File Uploads
|
||||
- SOCKS Proxy Support
|
||||
- Connection Timeouts
|
||||
- Streaming Downloads
|
||||
- Automatic honoring of `.netrc`
|
||||
- Chunked HTTP Requests
|
||||
|
||||
## API Reference and User Guide available on [Read the Docs](https://requests.readthedocs.io)
|
||||
|
||||
[](https://requests.readthedocs.io)
|
||||
|
||||
## Cloning the repository
|
||||
|
||||
When cloning the Requests repository, you may need to add the `-c
|
||||
fetch.fsck.badTimezone=ignore` flag to avoid an error about a bad commit timestamp (see
|
||||
[this issue](https://github.com/psf/requests/issues/2690) for more background):
|
||||
|
||||
```shell
|
||||
git clone -c fetch.fsck.badTimezone=ignore https://github.com/psf/requests.git
|
||||
```
|
||||
|
||||
You can also apply this setting to your global Git config:
|
||||
|
||||
```shell
|
||||
git config --global fetch.fsck.badTimezone ignore
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
[](https://kennethreitz.org) [](https://www.python.org/psf)
|
||||
|
|
@ -16,10 +16,6 @@
|
|||
setuptools_scm-9.2.2-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/setuptools-scm/setuptools_scm-8.3.1-py3-none-any.whl#sha256=332ca0d43791b818b841213e76b1971b7711a960761c5bea5fc5cdb5196fbce3" data-requires-python=">=3.8" data-dist-info-metadata="sha256=a4a5a66c7dd5f9113d3ae03392ca0918174fdeac4f1b261e63d259f9756df741">
|
||||
setuptools_scm-8.3.1-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/setuptools-scm/setuptools_scm-7.1.0-py3-none-any.whl#sha256=73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e" data-requires-python=">=3.7" data-dist-info-metadata="sha256=b3f2e97c7a459abfbe07dde0547e24e5aac33d4afcfd086cda59fdf855c48d8d">
|
||||
setuptools_scm-7.1.0-py3-none-any.whl
|
||||
</a>
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,173 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: setuptools-scm
|
||||
Version: 8.3.1
|
||||
Summary: the blessed package to manage your versions by scm tags
|
||||
Author-email: Ronny Pfannschmidt <opensource@ronnypfannschmidt.de>
|
||||
License: Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
Project-URL: documentation, https://setuptools-scm.readthedocs.io/
|
||||
Project-URL: repository, https://github.com/pypa/setuptools-scm/
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Topic :: Software Development :: Libraries
|
||||
Classifier: Topic :: Software Development :: Version Control
|
||||
Classifier: Topic :: System :: Software Distribution
|
||||
Classifier: Topic :: Utilities
|
||||
Requires-Python: >=3.8
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Requires-Dist: packaging>=20
|
||||
Requires-Dist: setuptools
|
||||
Requires-Dist: tomli>=1; python_version < "3.11"
|
||||
Requires-Dist: typing-extensions; python_version < "3.10"
|
||||
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
||||
Provides-Extra: docs
|
||||
Requires-Dist: entangled-cli~=2.0; extra == "docs"
|
||||
Requires-Dist: mkdocs; extra == "docs"
|
||||
Requires-Dist: mkdocs-entangled-plugin; extra == "docs"
|
||||
Requires-Dist: mkdocs-include-markdown-plugin; extra == "docs"
|
||||
Requires-Dist: mkdocs-material; extra == "docs"
|
||||
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
||||
Requires-Dist: pygments; extra == "docs"
|
||||
Provides-Extra: rich
|
||||
Requires-Dist: rich; extra == "rich"
|
||||
Provides-Extra: test
|
||||
Requires-Dist: build; extra == "test"
|
||||
Requires-Dist: pytest; extra == "test"
|
||||
Requires-Dist: rich; extra == "test"
|
||||
Requires-Dist: typing-extensions; python_version < "3.11" and extra == "test"
|
||||
Requires-Dist: wheel; extra == "test"
|
||||
Provides-Extra: toml
|
||||
Dynamic: license-file
|
||||
|
||||
# setuptools-scm
|
||||
[](https://github.com/pypa/setuptools-scm/actions/workflows/python-tests.yml)
|
||||
[](https://setuptools-scm.readthedocs.io/en/latest/?badge=latest)
|
||||
[ ](https://tidelift.com/subscription/pkg/pypi-setuptools-scm?utm_source=pypi-setuptools-scm&utm_medium=readme)
|
||||
|
||||
## about
|
||||
|
||||
[setuptools-scm] extracts Python package versions from `git` or `hg` metadata
|
||||
instead of declaring them as the version argument
|
||||
or in a Source Code Managed (SCM) managed file.
|
||||
|
||||
Additionally [setuptools-scm] provides `setuptools` with a list of
|
||||
files that are managed by the SCM
|
||||
<br/>
|
||||
(i.e. it automatically adds all the SCM-managed files to the sdist).
|
||||
<br/>
|
||||
Unwanted files must be excluded via `MANIFEST.in`
|
||||
or [configuring Git archive][git-archive-docs].
|
||||
|
||||
## `pyproject.toml` usage
|
||||
|
||||
The preferred way to configure [setuptools-scm] is to author
|
||||
settings in a `tool.setuptools_scm` section of `pyproject.toml`.
|
||||
|
||||
This feature requires setuptools 61 or later.
|
||||
First, ensure that [setuptools-scm] is present during the project's
|
||||
build step by specifying it as one of the build requirements.
|
||||
|
||||
```toml title="pyproject.toml"
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "setuptools-scm>=8"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
```
|
||||
|
||||
That will be sufficient to require [setuptools-scm] for projects
|
||||
that support [PEP 518] like [pip] and [build].
|
||||
|
||||
[pip]: https://pypi.org/project/pip
|
||||
[build]: https://pypi.org/project/build
|
||||
[PEP 518]: https://peps.python.org/pep-0518/
|
||||
|
||||
|
||||
To enable version inference, you need to set the version
|
||||
dynamically in the `project` section of `pyproject.toml`:
|
||||
|
||||
```toml title="pyproject.toml"
|
||||
[project]
|
||||
# version = "0.0.1" # Remove any existing version parameter.
|
||||
dynamic = ["version"]
|
||||
|
||||
[tool.setuptools_scm]
|
||||
```
|
||||
|
||||
Additionally, a version file can be written by specifying:
|
||||
|
||||
```toml title="pyproject.toml"
|
||||
[tool.setuptools_scm]
|
||||
version_file = "pkg/_version.py"
|
||||
```
|
||||
|
||||
Where `pkg` is the name of your package.
|
||||
|
||||
If you need to confirm which version string is being generated or debug the configuration,
|
||||
you can install [setuptools-scm] directly in your working environment and run:
|
||||
|
||||
```console
|
||||
$ python -m setuptools_scm
|
||||
# To explore other options, try:
|
||||
$ python -m setuptools_scm --help
|
||||
```
|
||||
|
||||
For further configuration see the [documentation].
|
||||
|
||||
[setuptools-scm]: https://github.com/pypa/setuptools-scm
|
||||
[documentation]: https://setuptools-scm.readthedocs.io/
|
||||
[git-archive-docs]: https://setuptools-scm.readthedocs.io/en/stable/usage/#builtin-mechanisms-for-obtaining-version-numbers
|
||||
|
||||
|
||||
## Interaction with Enterprise Distributions
|
||||
|
||||
Some enterprise distributions like RHEL7
|
||||
ship rather old setuptools versions.
|
||||
|
||||
In those cases its typically possible to build by using an sdist against `setuptools-scm<2.0`.
|
||||
As those old setuptools versions lack sensible types for versions,
|
||||
modern [setuptools-scm] is unable to support them sensibly.
|
||||
|
||||
It's strongly recommended to build a wheel artifact using modern Python and setuptools,
|
||||
then installing the artifact instead of trying to run against old setuptools versions.
|
||||
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
|
||||
Everyone interacting in the [setuptools-scm] project's codebases, issue
|
||||
trackers, chat rooms, and mailing lists is expected to follow the
|
||||
[PSF Code of Conduct].
|
||||
|
||||
[PSF Code of Conduct]: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md
|
||||
|
||||
|
||||
## Security Contact
|
||||
|
||||
To report a security vulnerability, please use the
|
||||
[Tidelift security contact](https://tidelift.com/security).
|
||||
Tidelift will coordinate the fix and disclosure.
|
||||
|
|
@ -16,9 +16,5 @@
|
|||
setuptools-80.9.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
<a href="/setuptools/setuptools-67.1.0-py3-none-any.whl#sha256=a7687c12b444eaac951ea87a9627c4f904ac757e7abdc5aac32833234af90378" data-requires-python=">=3.7" data-dist-info-metadata="sha256=286148a5442173faf79ca5aec07d58e4032d88a448099256ea59e50e5c20181b">
|
||||
setuptools-67.1.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,137 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: setuptools
|
||||
Version: 67.1.0
|
||||
Summary: Easily download, build, install, upgrade, and uninstall Python packages
|
||||
Home-page: https://github.com/pypa/setuptools
|
||||
Author: Python Packaging Authority
|
||||
Author-email: distutils-sig@python.org
|
||||
Project-URL: Documentation, https://setuptools.pypa.io/
|
||||
Project-URL: Changelog, https://setuptools.pypa.io/en/stable/history.html
|
||||
Keywords: CPAN PyPI distutils eggs package management
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Topic :: System :: Systems Administration
|
||||
Classifier: Topic :: Utilities
|
||||
Requires-Python: >=3.7
|
||||
License-File: LICENSE
|
||||
Provides-Extra: certs
|
||||
Provides-Extra: docs
|
||||
Requires-Dist: sphinx (>=3.5) ; extra == 'docs'
|
||||
Requires-Dist: jaraco.packaging (>=9) ; extra == 'docs'
|
||||
Requires-Dist: rst.linker (>=1.9) ; extra == 'docs'
|
||||
Requires-Dist: furo ; extra == 'docs'
|
||||
Requires-Dist: sphinx-lint ; extra == 'docs'
|
||||
Requires-Dist: jaraco.tidelift (>=1.4) ; extra == 'docs'
|
||||
Requires-Dist: pygments-github-lexers (==0.0.5) ; extra == 'docs'
|
||||
Requires-Dist: sphinx-favicon ; extra == 'docs'
|
||||
Requires-Dist: sphinx-inline-tabs ; extra == 'docs'
|
||||
Requires-Dist: sphinx-reredirects ; extra == 'docs'
|
||||
Requires-Dist: sphinxcontrib-towncrier ; extra == 'docs'
|
||||
Requires-Dist: sphinx-notfound-page (==0.8.3) ; extra == 'docs'
|
||||
Requires-Dist: sphinx-hoverxref (<2) ; extra == 'docs'
|
||||
Provides-Extra: ssl
|
||||
Provides-Extra: testing
|
||||
Requires-Dist: pytest (>=6) ; extra == 'testing'
|
||||
Requires-Dist: pytest-checkdocs (>=2.4) ; extra == 'testing'
|
||||
Requires-Dist: flake8 (<5) ; extra == 'testing'
|
||||
Requires-Dist: pytest-enabler (>=1.3) ; extra == 'testing'
|
||||
Requires-Dist: pytest-perf ; extra == 'testing'
|
||||
Requires-Dist: flake8-2020 ; extra == 'testing'
|
||||
Requires-Dist: virtualenv (>=13.0.0) ; extra == 'testing'
|
||||
Requires-Dist: wheel ; extra == 'testing'
|
||||
Requires-Dist: pip (>=19.1) ; extra == 'testing'
|
||||
Requires-Dist: jaraco.envs (>=2.2) ; extra == 'testing'
|
||||
Requires-Dist: pytest-xdist ; extra == 'testing'
|
||||
Requires-Dist: jaraco.path (>=3.2.0) ; extra == 'testing'
|
||||
Requires-Dist: build[virtualenv] ; extra == 'testing'
|
||||
Requires-Dist: filelock (>=3.4.0) ; extra == 'testing'
|
||||
Requires-Dist: pip-run (>=8.8) ; extra == 'testing'
|
||||
Requires-Dist: ini2toml[lite] (>=0.9) ; extra == 'testing'
|
||||
Requires-Dist: tomli-w (>=1.0.0) ; extra == 'testing'
|
||||
Requires-Dist: pytest-timeout ; extra == 'testing'
|
||||
Provides-Extra: testing-integration
|
||||
Requires-Dist: pytest ; extra == 'testing-integration'
|
||||
Requires-Dist: pytest-xdist ; extra == 'testing-integration'
|
||||
Requires-Dist: pytest-enabler ; extra == 'testing-integration'
|
||||
Requires-Dist: virtualenv (>=13.0.0) ; extra == 'testing-integration'
|
||||
Requires-Dist: tomli ; extra == 'testing-integration'
|
||||
Requires-Dist: wheel ; extra == 'testing-integration'
|
||||
Requires-Dist: jaraco.path (>=3.2.0) ; extra == 'testing-integration'
|
||||
Requires-Dist: jaraco.envs (>=2.2) ; extra == 'testing-integration'
|
||||
Requires-Dist: build[virtualenv] ; extra == 'testing-integration'
|
||||
Requires-Dist: filelock (>=3.4.0) ; extra == 'testing-integration'
|
||||
Requires-Dist: pytest-black (>=0.3.7) ; (platform_python_implementation != "PyPy") and extra == 'testing'
|
||||
Requires-Dist: pytest-cov ; (platform_python_implementation != "PyPy") and extra == 'testing'
|
||||
Requires-Dist: pytest-mypy (>=0.9.1) ; (platform_python_implementation != "PyPy") and extra == 'testing'
|
||||
Requires-Dist: pytest-flake8 ; (python_version < "3.12") and extra == 'testing'
|
||||
|
||||
.. image:: https://img.shields.io/pypi/v/setuptools.svg
|
||||
:target: https://pypi.org/project/setuptools
|
||||
|
||||
.. image:: https://img.shields.io/pypi/pyversions/setuptools.svg
|
||||
|
||||
.. image:: https://github.com/pypa/setuptools/workflows/tests/badge.svg
|
||||
:target: https://github.com/pypa/setuptools/actions?query=workflow%3A%22tests%22
|
||||
:alt: tests
|
||||
|
||||
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
:target: https://github.com/psf/black
|
||||
:alt: Code style: Black
|
||||
|
||||
.. image:: https://img.shields.io/readthedocs/setuptools/latest.svg
|
||||
:target: https://setuptools.pypa.io
|
||||
|
||||
.. image:: https://img.shields.io/badge/skeleton-2023-informational
|
||||
:target: https://blog.jaraco.com/skeleton
|
||||
|
||||
.. image:: https://img.shields.io/codecov/c/github/pypa/setuptools/master.svg?logo=codecov&logoColor=white
|
||||
:target: https://codecov.io/gh/pypa/setuptools
|
||||
|
||||
.. image:: https://tidelift.com/badges/github/pypa/setuptools?style=flat
|
||||
:target: https://tidelift.com/subscription/pkg/pypi-setuptools?utm_source=pypi-setuptools&utm_medium=readme
|
||||
|
||||
.. image:: https://img.shields.io/discord/803025117553754132
|
||||
:target: https://discord.com/channels/803025117553754132/815945031150993468
|
||||
:alt: Discord
|
||||
|
||||
See the `Installation Instructions
|
||||
<https://packaging.python.org/installing/>`_ in the Python Packaging
|
||||
User's Guide for instructions on installing, upgrading, and uninstalling
|
||||
Setuptools.
|
||||
|
||||
Questions and comments should be directed to `GitHub Discussions
|
||||
<https://github.com/pypa/setuptools/discussions>`_.
|
||||
Bug reports and especially tested patches may be
|
||||
submitted directly to the `bug tracker
|
||||
<https://github.com/pypa/setuptools/issues>`_.
|
||||
|
||||
|
||||
Code of Conduct
|
||||
===============
|
||||
|
||||
Everyone interacting in the setuptools project's codebases, issue trackers,
|
||||
chat rooms, and fora is expected to follow the
|
||||
`PSF Code of Conduct <https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md>`_.
|
||||
|
||||
|
||||
For Enterprise
|
||||
==============
|
||||
|
||||
Available as part of the Tidelift Subscription.
|
||||
|
||||
Setuptools and the maintainers of thousands of other packages are working with Tidelift to deliver one enterprise subscription that covers all of the open source you use.
|
||||
|
||||
`Learn more <https://tidelift.com/subscription/pkg/pypi-setuptools?utm_source=pypi-setuptools&utm_medium=referral&utm_campaign=github>`_.
|
||||
|
||||
|
||||
Security Contact
|
||||
================
|
||||
|
||||
To report a security vulnerability, please use the
|
||||
`Tidelift security contact <https://tidelift.com/security>`_.
|
||||
Tidelift will coordinate the fix and disclosure.
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for shippinglabel
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for shippinglabel
|
||||
</h1>
|
||||
<a href="/shippinglabel/shippinglabel-2.3.0-py3-none-any.whl#sha256=37811ae077f4a40e524efa013a136e3e2d295a96ad149a4e84b7788dd7f3b64a" data-requires-python=">=3.7" data-dist-info-metadata="sha256=7bcc0acc49d2dbb7bbaf848bd89a455ad101692f6f5ddf45f1a22c767b03ff61">
|
||||
shippinglabel-2.3.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,198 +0,0 @@
|
|||
Metadata-Version: 2.4
|
||||
Name: shippinglabel
|
||||
Version: 2.3.0
|
||||
Summary: Utilities for handling packages.
|
||||
Project-URL: Homepage, https://github.com/domdfcoding/shippinglabel
|
||||
Project-URL: Issue Tracker, https://github.com/domdfcoding/shippinglabel/issues
|
||||
Project-URL: Source Code, https://github.com/domdfcoding/shippinglabel
|
||||
Project-URL: Documentation, https://shippinglabel.readthedocs.io/en/latest
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: Copyright (c) 2020-2022 Dominic Davis-Foster
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
License-File: LICENSE
|
||||
Keywords: conda,packaging,pypi,requirements
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.7
|
||||
Requires-Dist: dist-meta>=0.1.2
|
||||
Requires-Dist: dom-toml>=0.2.2
|
||||
Requires-Dist: domdf-python-tools>=3.1.0
|
||||
Requires-Dist: packaging>=20.9
|
||||
Requires-Dist: typing-extensions>=3.7.4.3
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
#############
|
||||
shippinglabel
|
||||
#############
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**Utilities for handling packages.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Anaconda
|
||||
- |conda-version| |conda-platform|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/shippinglabel/latest?logo=read-the-docs
|
||||
:target: https://shippinglabel.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/domdfcoding/shippinglabel/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/domdfcoding/shippinglabel/workflows/Linux/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/domdfcoding/shippinglabel/workflows/Windows/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/domdfcoding/shippinglabel/workflows/macOS/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/domdfcoding/shippinglabel/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/domdfcoding/shippinglabel/workflows/mypy/badge.svg
|
||||
:target: https://github.com/domdfcoding/shippinglabel/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/domdfcoding/shippinglabel/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/domdfcoding/shippinglabel/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/domdfcoding/shippinglabel/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/domdfcoding/shippinglabel?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/domdfcoding/shippinglabel?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/domdfcoding/shippinglabel
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/shippinglabel
|
||||
:target: https://pypi.org/project/shippinglabel/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/shippinglabel?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/shippinglabel/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/shippinglabel
|
||||
:target: https://pypi.org/project/shippinglabel/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/shippinglabel
|
||||
:target: https://pypi.org/project/shippinglabel/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |conda-version| image:: https://img.shields.io/conda/v/conda-forge/shippinglabel?logo=anaconda
|
||||
:target: https://anaconda.org/conda-forge/shippinglabel
|
||||
:alt: Conda - Package Version
|
||||
|
||||
.. |conda-platform| image:: https://img.shields.io/conda/pn/conda-forge/shippinglabel?label=conda%7Cplatform
|
||||
:target: https://anaconda.org/conda-forge/shippinglabel
|
||||
:alt: Conda - Platform
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/domdfcoding/shippinglabel
|
||||
:target: https://github.com/domdfcoding/shippinglabel/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/domdfcoding/shippinglabel
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/domdfcoding/shippinglabel/v2.3.0
|
||||
:target: https://github.com/domdfcoding/shippinglabel/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/domdfcoding/shippinglabel
|
||||
:target: https://github.com/domdfcoding/shippinglabel/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2025
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/shippinglabel
|
||||
:target: https://pypi.org/project/shippinglabel/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``shippinglabel`` can be installed from PyPI or Anaconda.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install shippinglabel
|
||||
|
||||
To install with ``conda``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ conda install -c conda-forge shippinglabel
|
||||
|
||||
.. end installation
|
||||
|
||||
``shippinglabel`` includes a vendored copy of `trove-classifiers <https://pypi.org/project/trove-classifiers/>`_.
|
||||
If you install a newer version of ``trove-classifiers`` with pip ``shippinglabel`` will use that version instead.
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta name="generator" content="simple503 version 0.4.0" />
|
||||
<meta name="pypi:repository-version" content="1.0" />
|
||||
<meta charset="UTF-8" />
|
||||
<title>
|
||||
Links for simple503
|
||||
</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Links for simple503
|
||||
</h1>
|
||||
<a href="/simple503/simple503-0.4.0-py3-none-any.whl#sha256=01046e8e15392d7239a9df97128939caecd9d7699b9d1dd4517e37a058b67c8f" data-requires-python=">=3.6" data-dist-info-metadata="sha256=e53037c70d835cb1643b352dd5ddf9026dde51eed4e0fc5065c507617473e91a">
|
||||
simple503-0.4.0-py3-none-any.whl
|
||||
</a>
|
||||
<br />
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
|
@ -1,183 +0,0 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: simple503
|
||||
Version: 0.4.0
|
||||
Summary: PEP 503 Python package repository generator.
|
||||
Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
|
||||
License: MIT
|
||||
Keywords: pep503,pep658,pip,pypi
|
||||
Home-page: https://github.com/repo-helper/simple503
|
||||
Project-URL: Issue Tracker, https://github.com/repo-helper/simple503/issues
|
||||
Project-URL: Source Code, https://github.com/repo-helper/simple503
|
||||
Project-URL: Documentation, https://simple503.readthedocs.io/en/latest
|
||||
Platform: Windows
|
||||
Platform: macOS
|
||||
Platform: Linux
|
||||
Classifier: Development Status :: 3 - Alpha
|
||||
Classifier: Environment :: Console
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Classifier: Topic :: System :: Archiving :: Packaging
|
||||
Classifier: Typing :: Typed
|
||||
Requires-Python: >=3.6
|
||||
Requires-Dist: airium>=0.2.2
|
||||
Requires-Dist: apeye>=1.0.1
|
||||
Requires-Dist: click>=8.0.1
|
||||
Requires-Dist: consolekit>=1.2.2
|
||||
Requires-Dist: dataclasses>=0.8; python_version == "3.6"
|
||||
Requires-Dist: dist-meta>=0.1.0
|
||||
Requires-Dist: dom-toml>=0.5.1
|
||||
Requires-Dist: domdf-python-tools>=2.9.1
|
||||
Requires-Dist: natsort>=7.1.1
|
||||
Requires-Dist: shippinglabel>=0.15.0
|
||||
Requires-Dist: typing-extensions>=3.7.4.1
|
||||
Requires-Dist: beautifulsoup4>=4.9.3; extra == 'incremental'
|
||||
Requires-Dist: beautifulsoup4>=4.9.3; extra == 'all'
|
||||
Provides-Extra: incremental
|
||||
Provides-Extra: all
|
||||
Description-Content-Type: text/x-rst
|
||||
|
||||
|
||||
==========
|
||||
simple503
|
||||
==========
|
||||
|
||||
.. start short_desc
|
||||
|
||||
**PEP 503 Python package repository generator.**
|
||||
|
||||
.. end short_desc
|
||||
|
||||
|
||||
``simple503`` generates a static, `PEP 503`_ simple repository of Python distributions.
|
||||
It takes a directory of Python `wheels`_ and generates the necessary directories and ``index.html`` files.
|
||||
The source directory can optionally be pre-sorted by project name, or ``simple503`` can do this for you.
|
||||
|
||||
An example repository can be seen at https://repo-helper.uk/simple503/
|
||||
|
||||
.. _PEP 503: https://www.python.org/dev/peps/pep-0503/
|
||||
.. _wheels: https://www.python.org/dev/peps/pep-0427/
|
||||
|
||||
.. start shields
|
||||
|
||||
.. list-table::
|
||||
:stub-columns: 1
|
||||
:widths: 10 90
|
||||
|
||||
* - Docs
|
||||
- |docs| |docs_check|
|
||||
* - Tests
|
||||
- |actions_linux| |actions_windows| |actions_macos| |coveralls|
|
||||
* - PyPI
|
||||
- |pypi-version| |supported-versions| |supported-implementations| |wheel|
|
||||
* - Activity
|
||||
- |commits-latest| |commits-since| |maintained| |pypi-downloads|
|
||||
* - QA
|
||||
- |codefactor| |actions_flake8| |actions_mypy|
|
||||
* - Other
|
||||
- |license| |language| |requires|
|
||||
|
||||
.. |docs| image:: https://img.shields.io/readthedocs/simple503/latest?logo=read-the-docs
|
||||
:target: https://simple503.readthedocs.io/en/latest
|
||||
:alt: Documentation Build Status
|
||||
|
||||
.. |docs_check| image:: https://github.com/repo-helper/simple503/workflows/Docs%20Check/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Docs+Check%22
|
||||
:alt: Docs Check Status
|
||||
|
||||
.. |actions_linux| image:: https://github.com/repo-helper/simple503/workflows/Linux/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Linux%22
|
||||
:alt: Linux Test Status
|
||||
|
||||
.. |actions_windows| image:: https://github.com/repo-helper/simple503/workflows/Windows/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Windows%22
|
||||
:alt: Windows Test Status
|
||||
|
||||
.. |actions_macos| image:: https://github.com/repo-helper/simple503/workflows/macOS/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22macOS%22
|
||||
:alt: macOS Test Status
|
||||
|
||||
.. |actions_flake8| image:: https://github.com/repo-helper/simple503/workflows/Flake8/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22Flake8%22
|
||||
:alt: Flake8 Status
|
||||
|
||||
.. |actions_mypy| image:: https://github.com/repo-helper/simple503/workflows/mypy/badge.svg
|
||||
:target: https://github.com/repo-helper/simple503/actions?query=workflow%3A%22mypy%22
|
||||
:alt: mypy status
|
||||
|
||||
.. |requires| image:: https://dependency-dash.repo-helper.uk/github/repo-helper/simple503/badge.svg
|
||||
:target: https://dependency-dash.repo-helper.uk/github/repo-helper/simple503/
|
||||
:alt: Requirements Status
|
||||
|
||||
.. |coveralls| image:: https://img.shields.io/coveralls/github/repo-helper/simple503/master?logo=coveralls
|
||||
:target: https://coveralls.io/github/repo-helper/simple503?branch=master
|
||||
:alt: Coverage
|
||||
|
||||
.. |codefactor| image:: https://img.shields.io/codefactor/grade/github/repo-helper/simple503?logo=codefactor
|
||||
:target: https://www.codefactor.io/repository/github/repo-helper/simple503
|
||||
:alt: CodeFactor Grade
|
||||
|
||||
.. |pypi-version| image:: https://img.shields.io/pypi/v/simple503
|
||||
:target: https://pypi.org/project/simple503/
|
||||
:alt: PyPI - Package Version
|
||||
|
||||
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/simple503?logo=python&logoColor=white
|
||||
:target: https://pypi.org/project/simple503/
|
||||
:alt: PyPI - Supported Python Versions
|
||||
|
||||
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/simple503
|
||||
:target: https://pypi.org/project/simple503/
|
||||
:alt: PyPI - Supported Implementations
|
||||
|
||||
.. |wheel| image:: https://img.shields.io/pypi/wheel/simple503
|
||||
:target: https://pypi.org/project/simple503/
|
||||
:alt: PyPI - Wheel
|
||||
|
||||
.. |license| image:: https://img.shields.io/github/license/repo-helper/simple503
|
||||
:target: https://github.com/repo-helper/simple503/blob/master/LICENSE
|
||||
:alt: License
|
||||
|
||||
.. |language| image:: https://img.shields.io/github/languages/top/repo-helper/simple503
|
||||
:alt: GitHub top language
|
||||
|
||||
.. |commits-since| image:: https://img.shields.io/github/commits-since/repo-helper/simple503/v0.4.0
|
||||
:target: https://github.com/repo-helper/simple503/pulse
|
||||
:alt: GitHub commits since tagged version
|
||||
|
||||
.. |commits-latest| image:: https://img.shields.io/github/last-commit/repo-helper/simple503
|
||||
:target: https://github.com/repo-helper/simple503/commit/master
|
||||
:alt: GitHub last commit
|
||||
|
||||
.. |maintained| image:: https://img.shields.io/maintenance/yes/2023
|
||||
:alt: Maintenance
|
||||
|
||||
.. |pypi-downloads| image:: https://img.shields.io/pypi/dm/simple503
|
||||
:target: https://pypi.org/project/simple503/
|
||||
:alt: PyPI - Downloads
|
||||
|
||||
.. end shields
|
||||
|
||||
Installation
|
||||
--------------
|
||||
|
||||
.. start installation
|
||||
|
||||
``simple503`` can be installed from PyPI.
|
||||
|
||||
To install with ``pip``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install simple503
|
||||
|
||||
.. end installation
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue