#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
(c) 2017 Martin Wendt; see https://github.com/mar10/fabulist
Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
"""
from __future__ import print_function
import os
import random
def _get_count(int_or_range):
"""Return random int for given range (or int if a simple value was passed)."""
if type(int_or_range) is int:
return int_or_range
return random.randint(*int_or_range)
# -------------------------------------------------------------------------------------------------
# LoremDialect
# -------------------------------------------------------------------------------------------------
class LoremDialect(object):
"""Generate lorem ipsum text.
Args:
dialect (str): "lorem", "pulp", ...
path (str):
Examples:
$(TYPE:MODS:#foo|bar:=NUM)
"""
def __init__(self, dialect, path):
self.dialect = dialect
self.path = path
self.paragraphs = None
self.sentences = None
self.words = None
# self.load()
def load(self):
sentence_set = set()
self.paragraphs = []
self.sentences = []
self.words = set()
para = []
for line in open(self.path, "rt"):
# Skip empty lines and comments (i.e. starting with '#')
line = line.strip()
if not line or line.startswith("#"):
continue
# Paragraphs are delimited by a '---' line
if line.startswith("---"):
self.paragraphs.append(para)
para = []
else:
para.append(line)
# Also collect a flat list of all sentences
if line not in sentence_set:
self.sentences.append(line)
sentence_set.add(line)
# Also collect a set of words
for word in line.split(" "):
word = word.strip(" \t\n,.!?;:-").lower()
if word:
self.words.add(word)
self.words = list(self.words)
if para:
self.paragraphs.append(para)
return
def _generate_sentences(self, entropy=0, keep_first=True, count=None):
"""Generate a sequence of sentences.
Args:
entropy (int, optional):
0: generate sentences in original order
1: pick random paragraph, then use sentences in order
2: pick random sentence
Default: 0.
keep_first (bool, optional):
Always return the words of the first sentence as first result.
Default: False.
count (int):
Number of words.
Yields:
str: Random word.
"""
if self.paragraphs is None:
self.load()
pool_idx = 0
pool_remain = 0
sentence_pool = self.sentences
n_sentences = 0
while count is None or n_sentences < count:
if entropy == 1 and pool_remain == 0:
# Pick random paragraph, then use sentences in order
sentence_pool = random.choice(self.paragraphs)
pool_remain = len(sentence_pool)
pool_idx = 0
if entropy == 2:
# Pick random sentence
sentence = random.choice(sentence_pool)
else:
# Generate sentences in original order
sentence = sentence_pool[pool_idx % len(sentence_pool)]
if keep_first:
keep_first = False
if sentence != self.sentences[0]:
# Start with first sentence
yield self.sentences[0]
n_sentences += 1
pool_idx += 1
pool_remain -= 1
n_sentences += 1
yield sentence
return
# -------------------------------------------------------------------------------------------------
# LoremGenerator
# -------------------------------------------------------------------------------------------------
[docs]class LoremGenerator(object):
"""Generate lorem ipsum text in a given dialect.
Attributes:
dialect_map (dict(dialect, LoremDialect)):
Holds all available lorem-ipsum dialects
"""
def __init__(self, data_folder):
self.dialect_map = {}
self.root_path = data_folder
# Find all available dialects and add to map(dialect => path)
for name in os.listdir(self.root_path):
if name.startswith("lorem_"):
dialect = os.path.splitext(name)[0][6:]
path = os.path.join(self.root_path, name)
self.dialect_map[dialect] = LoremDialect(dialect, path)
return
def _get_lorem(self, dialect):
"""Return a LoremDialect instance and load data or raise ValueError."""
if dialect is None:
dialect = random.choice(list(self.dialect_map.keys()))
lorem = self.dialect_map.get(dialect)
if not lorem:
raise ValueError(
"Unknown dialect {!r} (expected {})".format(
dialect, ", ".join(self.dialect_map.keys())
)
)
if lorem.paragraphs is None:
lorem.load()
return lorem
[docs] def generate_words(self, count=None, dialect="ipsum", entropy=3, keep_first=False):
"""Yield <count> random words.
Args:
count (int, optional):
Number of words. Pass None for infinite.
Default: None.
dialect (str, optional):
For example "ipsum", "pulp", "trappatoni". Pass `None` to pick a random dialect.
Default: "ipsum" (i.e. lorem-ipsum).
entropy (int, optional):
0: iterate words from original text
1: pick a random paragraph, then use it literally
2: pick a random sentence, then use it literally
3: pick random words
Default: 3.
keep_first (bool, optional):
Always return the words of the first sentence as first result.
Default: False.
Yields:
str: Random word.
"""
lorem = self._get_lorem(dialect)
i = 0
if entropy == 3:
# Pick random words
if keep_first:
raise NotImplementedError
while count is None or i < count:
yield random.choice(lorem.words)
i += 1
return
# Otherwise pop words from sentence sequence
for s in lorem._generate_sentences(keep_first=keep_first, entropy=entropy):
for word in s.split(" "):
if count is not None and i >= count:
return
word = word.strip().rstrip(".!?:")
if word:
yield word.lower()
i += 1
return
[docs] def generate_sentences(
self,
count=None,
dialect="ipsum",
entropy=2,
keep_first=False,
words_per_sentence=(3, 15),
):
"""Yield <count> random sentences.
Args:
count (int, optional):
Number of sentences. Pass None for infinite.
Default: None.
dialect (str, optional):
For example "ipsum", "pulp", "trappatoni". Pass `None` to pick a random dialect.
Default: "ipsum" (i.e. lorem-ipsum).
entropy (int, optional):
0: iterate sentences from original text
1: pick a random paragraph, then iterate sentences
2: pick a random sentence
3: mix random words
Default: 2.
keep_first (bool, optional):
Always return the first sentence as first result.
Default: False.
words_per_sentence (int or tuple(min, max), optional):
Number of words per sentence.
This argument is only used for entropy=3.
Default: (3, 15).
Yields:
str: Random sentence.
"""
lorem = self._get_lorem(dialect)
i = 0
if entropy == 3:
# Generate from random words
if keep_first:
yield lorem.sentences[0]
i += 1
if not words_per_sentence:
raise ValueError(
"entropy=3 requires words_per_sentence arg: int or a tuple(min, max)"
)
while count is None or i < count:
n_words = _get_count(words_per_sentence)
sentence = random.sample(lorem.words, n_words)
sentence = " ".join(sentence).capitalize() + "."
yield sentence
i += 1
return
# entropy = 0..2: use sentences from original text
for i, s in enumerate(
lorem._generate_sentences(keep_first=keep_first, entropy=entropy)
):
if i >= count:
break
yield s
return
[docs] def generate_paragraphs(
self,
count=None,
dialect="ipsum",
entropy=2,
keep_first=False,
words_per_sentence=(3, 15),
sentences_per_para=(2, 6),
):
"""Generate a number of paragraphs, made up from random sentences.
Args:
count (int, optional):
Number of paragraphs. Pass None for infinite.
Default: None.
dialect (str, optional):
For example "ipsum", "pulp", "trappatoni". Pass `None` to pick a random dialect.
Default: "ipsum".
keep_first (bool, optional):
Always return the first sentence as first result. Default: False.
entropy (int):
0: iterate original text
1: pick a random paragraph, then use it literally
2: mix a random sentences
3: mix random words
Default: 2.
words_per_sentence (int or tuple(min, max), optional):
Number of words per sentence.
This argument is only used for entropy=3.
Default: (3, 15).
sentences_per_para (int or tuple(min, max), optional):
Number of sentences per paragraph.
Default: (2, 6).
Yields:
str: Random paragraph.
"""
i = 0
while count is None or i < count:
n_sents = _get_count(sentences_per_para)
para = self.generate_sentences(
n_sents, dialect, entropy, keep_first, words_per_sentence
)
para = " ".join(para)
yield para
i += 1
keep_first = False
return
if __name__ == "__main__":
data_folder = os.path.join(os.path.dirname(__file__), "data")
lorem = LoremGenerator(data_folder)
# print(list(lorem.generate_words(10)))
# print(list(lorem.generate_words(10, entropy=0)))
# print(list(lorem.generate_words(10, entropy=1)))
# print(list(lorem.generate_words(10, entropy=2)))
# print(list(lorem.generate_words(10, entropy=3)))
#
# print(list(lorem.generate_sentences(3, entropy=3, words_per_sentence=(5,8))))
# print(list(lorem.generate_sentences(3, entropy=3, words_per_sentence=5)))
# print(list(lorem.generate_sentences(3, entropy=3, words_per_sentence=5, keep_first=True)))
#
# print(list(lorem.generate_sentences(3, dialect="pulp", entropy=2, keep_first=True)))
#
#
# print()
# print("\n".join(lorem.generate_paragraphs(3)))
# print()
# print("\n".join(lorem.generate_paragraphs(3, dialect="pulp", entropy=2, keep_first=True)))
# print()
# print("\n".join(lorem.generate_paragraphs(3, dialect="pulp", entropy=3, keep_first=True)))
exit()
# s = lorem.generate(None, 20)
# gen = lorem.dialect_map["pulp"].generate_sentences(3)
gen = lorem.generate_sentences(10, keep_first=False, entropy=2)
for s in gen:
print("-", s)
# s = lorem.generate("pulp", 20)
# print(s)