Examples#
This page provides comprehensive examples of using the Persian library in real-world scenarios.
Web Development#
Django Views#
Clean Persian input in Django views:
from django.http import JsonResponse
from django.views import View
import persian
class ArticleCreateView(View):
def post(self, request):
# Get and normalize Persian content
title = persian.normalize_persian(request.POST.get('title', ''))
content = persian.normalize_persian(request.POST.get('content', ''))
# Validate
if not persian.is_persian_text(title):
return JsonResponse({
'error': 'Title must contain Persian text'
}, status=400)
# Save to database
article = Article.objects.create(
title=title,
content=content
)
return JsonResponse({
'id': article.id,
'title': article.title
})
Flask Routes#
Process Persian data in Flask:
from flask import Flask, request, jsonify
import persian
app = Flask(__name__)
@app.route('/api/search', methods=['POST'])
def search():
query = request.json.get('query', '')
# Normalize search query
normalized_query = persian.normalize_persian(query)
# Perform search
results = search_database(normalized_query)
return jsonify({
'query': normalized_query,
'results': results
})
FastAPI Endpoints#
Use with FastAPI and Pydantic:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, field_validator
import persian
app = FastAPI()
class Article(BaseModel):
title: str
content: str
@field_validator('title', 'content')
@classmethod
def normalize_persian_text(cls, v: str) -> str:
if not persian.is_persian_text(v):
raise ValueError('Text must contain Persian characters')
return persian.normalize_persian(v)
@app.post('/articles/')
async def create_article(article: Article):
# Article data is already normalized by Pydantic
return {'title': article.title, 'content': article.content}
Data Processing#
Pandas DataFrames#
Process Persian text in pandas:
import pandas as pd
import persian
# Load data
df = pd.read_csv('articles.csv')
# Normalize all text columns
text_columns = ['title', 'content', 'summary']
for col in text_columns:
df[col] = df[col].apply(persian.normalize_persian)
# Convert Persian numbers to English for analysis
df['views_english'] = df['views_persian'].apply(persian.convert_fa_numbers)
df['views_int'] = df['views_english'].astype(int)
# Save cleaned data
df.to_csv('articles_cleaned.csv', index=False)
CSV Processing#
Clean Persian CSV files:
import csv
import persian
def clean_persian_csv(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as infile, \
open(output_file, 'w', encoding='utf-8', newline='') as outfile:
reader = csv.DictReader(infile)
writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames)
writer.writeheader()
for row in reader:
# Normalize all Persian fields
for key, value in row.items():
if persian.is_persian_text(value):
row[key] = persian.normalize_persian(value)
writer.writerow(row)
clean_persian_csv('input.csv', 'output.csv')
JSON Processing#
Process Persian content in JSON files:
import json
import persian
def normalize_json_content(data):
"""Recursively normalize Persian text in JSON structure."""
if isinstance(data, dict):
return {k: normalize_json_content(v) for k, v in data.items()}
elif isinstance(data, list):
return [normalize_json_content(item) for item in data]
elif isinstance(data, str) and persian.is_persian_text(data):
return persian.normalize_persian(data)
return data
# Load JSON
with open('data.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# Normalize
normalized = normalize_json_content(data)
# Save
with open('data_normalized.json', 'w', encoding='utf-8') as f:
json.dump(normalized, f, ensure_ascii=False, indent=2)
Text Analysis#
Word Frequency#
Analyze Persian text frequency:
from collections import Counter
import persian
import re
def analyze_persian_text(text):
# Normalize text
text = persian.normalize_persian(text)
# Remove punctuation and split into words
words = re.findall(r'[\u0600-\u06FF]+', text)
# Count frequency
freq = Counter(words)
return freq.most_common(10)
text = "سلام سلام به همه دوستان من. من کتاب می خوانم"
top_words = analyze_persian_text(text)
print(top_words)
Text Cleaning#
Clean and prepare Persian text for NLP:
import persian
import re
def clean_persian_for_nlp(text):
"""Clean Persian text for NLP processing."""
# Normalize Persian text
text = persian.normalize_persian(text)
# Remove Arabic diacritics
text = persian.remove_arabic_diacritics(text)
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text)
# Remove special characters except Persian and space
text = re.sub(r'[^\u0600-\u06FF\s]', '', text)
return text.strip()
raw_text = "سلام!!! من کتاب می خوانم."
clean_text = clean_persian_for_nlp(raw_text)
print(clean_text) # 'سلام من کتاب میخوانم'
Content Management#
Blog Post Processing#
Prepare blog posts for publication:
import persian
from datetime import datetime
class BlogPost:
def __init__(self, title, content, author):
self.title = persian.normalize_persian(title)
self.content = self._process_content(content)
self.author = persian.normalize_persian(author)
self.created_at = datetime.now()
def _process_content(self, content):
# Normalize Persian text
content = persian.normalize_persian(content)
# Fix common issues
content = persian.convert_ar_characters(content)
content = persian.convert_ar_numbers(content)
content = persian.convert_fa_spaces(content)
return content
def to_dict(self):
return {
'title': self.title,
'content': self.content,
'author': self.author,
'created_at': self.created_at.isoformat()
}
post = BlogPost(
title="عنوان مقاله",
content="محتوای مقاله با شماره ٣٤٥",
author="نویسنده"
)
SEO URL Generation#
Generate SEO-friendly URLs from Persian titles:
import persian
import re
from urllib.parse import quote
def generate_seo_url(persian_title):
"""Generate SEO-friendly URL from Persian title."""
# Normalize
title = persian.normalize_persian(persian_title)
# Remove diacritics
title = persian.remove_arabic_diacritics(title)
# Replace spaces with hyphens
slug = title.replace(' ', '-')
# Remove extra characters
slug = re.sub(r'[^\u0600-\u06FF\-]', '', slug)
# URL encode
return quote(slug)
title = "آموزش پایتون برای مبتدی ها"
url = generate_seo_url(title)
print(url) # URL-encoded Persian slug
Search Implementation#
Full-Text Search#
Implement Persian text search:
import persian
class PersianSearch:
def __init__(self, documents):
self.documents = []
for doc in documents:
self.documents.append({
'original': doc,
'normalized': persian.normalize_persian(doc)
})
def search(self, query):
"""Search for normalized query in normalized documents."""
normalized_query = persian.normalize_persian(query)
results = []
for doc in self.documents:
if normalized_query in doc['normalized']:
results.append(doc['original'])
return results
# Usage
documents = [
"سلام من کتاب می خوانم",
"درس پایتون خیلی جالب است",
"کتاب های زیادی دارم"
]
search_engine = PersianSearch(documents)
results = search_engine.search("کتاب")
print(results)
Fuzzy Matching#
Implement fuzzy search for Persian text:
import persian
from difflib import SequenceMatcher
def fuzzy_search_persian(query, documents, threshold=0.6):
"""Fuzzy search in Persian documents."""
normalized_query = persian.normalize_persian(query)
results = []
for doc in documents:
normalized_doc = persian.normalize_persian(doc)
similarity = SequenceMatcher(
None,
normalized_query,
normalized_doc
).ratio()
if similarity >= threshold:
results.append((doc, similarity))
# Sort by similarity
results.sort(key=lambda x: x[1], reverse=True)
return results
documents = [
"کتاب پایتون",
"کتابخانه پایتون",
"آموزش جاوا"
]
results = fuzzy_search_persian("کتاب پایتن", documents, threshold=0.5)
for doc, score in results:
print(f"{doc}: {score:.2f}")
Form Validation#
Contact Form#
Validate contact form with Persian content:
import persian
import re
def validate_persian_contact_form(data):
"""Validate contact form with Persian requirements."""
errors = {}
# Validate name
name = data.get('name', '')
if not persian.is_persian_text(name):
errors['name'] = 'نام باید به فارسی باشد'
else:
data['name'] = persian.normalize_persian(name)
# Validate message
message = data.get('message', '')
if len(message) < 10:
errors['message'] = 'پیام باید حداقل ۱۰ کاراکتر باشد'
elif not persian.is_persian_text(message):
errors['message'] = 'پیام باید به فارسی باشد'
else:
data['message'] = persian.normalize_persian(message)
# Validate phone (should be in Persian or English digits)
phone = data.get('phone', '')
if persian.contains_persian_digits(phone):
data['phone'] = persian.convert_fa_numbers(phone)
return errors, data
form_data = {
'name': 'رضا کمالی',
'message': 'سلام من یک پیام دارم',
'phone': '۰۹۱۲۳۴۵۶۷۸۹'
}
errors, cleaned_data = validate_persian_contact_form(form_data)
if not errors:
print("Form is valid:", cleaned_data)
Registration Form#
Process user registration with Persian data:
import persian
class UserRegistration:
def __init__(self, first_name, last_name, bio):
self.errors = []
# Validate and normalize names
if not persian.is_persian_text(first_name):
self.errors.append('First name must be in Persian')
else:
self.first_name = persian.normalize_persian(first_name)
if not persian.is_persian_text(last_name):
self.errors.append('Last name must be in Persian')
else:
self.last_name = persian.normalize_persian(last_name)
# Bio is optional but should be normalized
self.bio = persian.normalize_persian(bio) if bio else ''
def is_valid(self):
return len(self.errors) == 0
def save(self):
if self.is_valid():
# Save to database
print(f"Saving: {self.first_name} {self.last_name}")
return True
return False
user = UserRegistration(
first_name="رضا",
last_name="کمالی",
bio="برنامه نویس پایتون"
)
if user.is_valid():
user.save()
Batch Processing#
Process Multiple Files#
Batch process Persian text files:
import persian
from pathlib import Path
def batch_normalize_files(input_dir, output_dir):
"""Normalize all text files in a directory."""
input_path = Path(input_dir)
output_path = Path(output_dir)
output_path.mkdir(exist_ok=True)
for file in input_path.glob('*.txt'):
# Read file
content = file.read_text(encoding='utf-8')
# Normalize
normalized = persian.normalize_persian(content)
# Write to output
output_file = output_path / file.name
output_file.write_text(normalized, encoding='utf-8')
print(f"Processed: {file.name}")
batch_normalize_files('input_texts', 'output_texts')
Parallel Processing#
Process large datasets in parallel:
import persian
from multiprocessing import Pool
import pandas as pd
def normalize_row(row):
"""Normalize a single row."""
row['title'] = persian.normalize_persian(row['title'])
row['content'] = persian.normalize_persian(row['content'])
return row
def parallel_normalize(csv_file, output_file, workers=4):
"""Normalize CSV file using multiple processes."""
df = pd.read_csv(csv_file)
with Pool(workers) as pool:
results = pool.map(normalize_row, [row for _, row in df.iterrows()])
normalized_df = pd.DataFrame(results)
normalized_df.to_csv(output_file, index=False)
parallel_normalize('large_dataset.csv', 'normalized_dataset.csv')
Testing Helpers#
Unit Test Utilities#
Helper functions for testing Persian text:
import unittest
import persian
class PersianTestCase(unittest.TestCase):
def assertPersianText(self, text, msg=None):
"""Assert that text contains Persian characters."""
if not persian.is_persian_text(text):
raise AssertionError(msg or f"{text} is not Persian text")
def assertNormalized(self, text, expected):
"""Assert that text normalizes to expected value."""
result = persian.normalize_persian(text)
self.assertEqual(result, expected)
class MyTest(PersianTestCase):
def test_user_input(self):
input_text = "سلام ٣٤٥"
self.assertPersianText(input_text)
self.assertNormalized(input_text, "سلام ۳۴۵")
Next Steps#
Check the Core Functions for complete API reference
See Utility Functions for detection utilities
Read about Migration Guide for upgrading from older versions